From 9dfeefbf82588bf79a1cf159c6aafe852c97adbd Mon Sep 17 00:00:00 2001 From: liu946 Date: Sat, 3 Jun 2017 03:43:46 +0800 Subject: [PATCH 01/22] srl update --- .gitignore | 3 +- src/console/CMakeLists.txt | 22 +- src/console/dispatcher.h | 18 + src/console/srl_cmdline.cpp | 180 ++ src/ltp/LTPResource.cpp | 539 ++-- src/ltp/LTPResource.h | 204 +- src/ltp/Ltp.cpp | 969 +++--- src/ltp/Ltp.h | 269 +- src/server/CMakeLists.txt | 6 +- src/server/ltp_server.cpp | 144 +- src/srl/CMakeLists.txt | 100 +- src/srl/Configuration.cpp | 134 - src/srl/Configuration.h | 147 - src/srl/ConstVar.h | 120 - src/srl/Corpus.cpp | 63 - src/srl/Corpus.h | 39 - src/srl/DataPreProcess.cpp | 139 - src/srl/DataPreProcess.h | 40 - src/srl/DataStruct.h | 76 - src/srl/DepSRL.cpp | 938 +----- src/srl/DepSRL.h | 199 +- src/srl/FeatureExtractor.cpp | 1492 --------- src/srl/FeatureExtractor.h | 424 --- src/srl/GetInstance.cpp | 181 -- src/srl/GetInstance.h | 72 - src/srl/MyStruct.h | 97 - src/srl/MyTree.cpp | 669 ---- src/srl/MyTree.h | 74 - src/srl/Pi/CMakeLists.txt | 14 + src/srl/Pi/config/SrlPiConfig.h | 67 + src/srl/Pi/model/SrlPiModel.h | 123 + src/srl/Pi/pred.cpp | 14 + src/srl/Pi/process/PredSrlPi.cpp | 5 + src/srl/Pi/process/PredSrlPi.h | 53 + src/srl/Pi/process/TrainSrlPi.cpp | 5 + src/srl/Pi/process/TrainSrlPi.h | 53 + src/srl/Pi/train.cpp | 14 + src/srl/SRLBaseline.cpp | 62 - src/srl/SRLBaseline.h | 49 - src/srl/SRLBaselineExt.cpp | 352 --- src/srl/SRLBaselineExt.h | 47 - src/srl/SRL_DLL.cpp | 98 +- src/srl/SRL_DLL.h | 38 +- src/srl/SRL_DLL_x.cpp | 23 - src/srl/Sentence.cpp | 245 -- src/srl/Sentence.h | 185 -- src/srl/Srl/CMakeLists.txt | 14 + src/srl/Srl/config/SrlSrlConfig.h | 69 + src/srl/Srl/model/SrlSrlModel.cpp | 125 + src/srl/Srl/model/SrlSrlModel.h | 87 + src/srl/Srl/pred.cpp | 14 + src/srl/Srl/process/PredSrlSrl.cpp | 5 + src/srl/Srl/process/PredSrlSrl.h | 57 + src/srl/Srl/process/TrainSrlSrl.cpp | 5 + src/srl/Srl/process/TrainSrlSrl.h | 54 + src/srl/Srl/train.cpp | 14 + src/srl/common/CMakeLists.txt | 14 + src/srl/common/Const.h | 21 + src/srl/common/config/ModelConf.h | 84 + .../extractor/ConverterDataToSrlPiSample.h | 53 + .../extractor/ConverterFileContextToWordEmb.h | 65 + .../common/extractor/ExtractorFileToWordEmb.h | 45 + .../model/AffineTransformModelBuilder.cpp | 17 + .../model/AffineTransformModelBuilder.h | 67 + src/srl/common/model/BaseLabelModel.h | 278 ++ src/srl/common/model/BiRNNModelBuilder.h | 156 + src/srl/common/model/CNN1dLayerBuilder.cpp | 53 + src/srl/common/model/CNN1dLayerBuilder.h | 52 + .../common/model/ConstLookupModelBuilder.h | 23 + src/srl/common/model/LabelModel.h | 40 + src/srl/common/model/LookupModelBuilder.h | 75 + src/srl/common/model/MLPModelBuilder.h | 57 + src/srl/common/model/ModelBuilder.h | 46 + src/srl/common/model/PiSrlModel.h | 54 + src/srl/common/model/RNNModelBuilder.h | 215 ++ src/srl/common/model/SeqLabelModel.h | 44 + src/srl/common/process/DynetPredictor.h | 31 + src/srl/common/process/DynetTrainer.h | 39 + .../process/LabelModelSGDSeqPredictor.h | 54 + .../common/process/LabelModelSGDSeqTrainer.h | 145 + src/srl/common/process/LabelModelSGDTrainer.h | 137 + src/srl/common/process/TrainStats.h | 58 + src/srl/common/structure/SrlPiSample.cpp | 9 + src/srl/common/structure/SrlPiSample.h | 46 + src/srl/common/structure/Word.h | 93 + src/srl/common/structure/WordEmbBuilder.h | 61 + src/srl/lgsrl.cpp | 746 ----- src/srl/options.h | 26 - src/srl/tool/CMakeLists.txt | 5 + src/srl/tool/config/ToolConf.h | 30 + src/srl/tool/merge.cpp | 12 + src/srl/tool/process/merge.h | 59 + src/srl/tree.hh | 2685 ----------------- thirdparty/jsoncpp/pkg-config/jsoncpp.pc.in | 11 + 94 files changed, 4626 insertions(+), 10329 deletions(-) create mode 100644 src/console/srl_cmdline.cpp delete mode 100644 src/srl/Configuration.cpp delete mode 100644 src/srl/Configuration.h delete mode 100644 src/srl/ConstVar.h delete mode 100644 src/srl/Corpus.cpp delete mode 100644 src/srl/Corpus.h delete mode 100644 src/srl/DataPreProcess.cpp delete mode 100644 src/srl/DataPreProcess.h delete mode 100644 src/srl/DataStruct.h delete mode 100644 src/srl/FeatureExtractor.cpp delete mode 100644 src/srl/FeatureExtractor.h delete mode 100644 src/srl/GetInstance.cpp delete mode 100644 src/srl/GetInstance.h delete mode 100644 src/srl/MyStruct.h delete mode 100644 src/srl/MyTree.cpp delete mode 100644 src/srl/MyTree.h create mode 100644 src/srl/Pi/CMakeLists.txt create mode 100644 src/srl/Pi/config/SrlPiConfig.h create mode 100644 src/srl/Pi/model/SrlPiModel.h create mode 100644 src/srl/Pi/pred.cpp create mode 100644 src/srl/Pi/process/PredSrlPi.cpp create mode 100644 src/srl/Pi/process/PredSrlPi.h create mode 100644 src/srl/Pi/process/TrainSrlPi.cpp create mode 100644 src/srl/Pi/process/TrainSrlPi.h create mode 100644 src/srl/Pi/train.cpp delete mode 100644 src/srl/SRLBaseline.cpp delete mode 100644 src/srl/SRLBaseline.h delete mode 100644 src/srl/SRLBaselineExt.cpp delete mode 100644 src/srl/SRLBaselineExt.h delete mode 100644 src/srl/SRL_DLL_x.cpp delete mode 100644 src/srl/Sentence.cpp delete mode 100644 src/srl/Sentence.h create mode 100644 src/srl/Srl/CMakeLists.txt create mode 100644 src/srl/Srl/config/SrlSrlConfig.h create mode 100644 src/srl/Srl/model/SrlSrlModel.cpp create mode 100644 src/srl/Srl/model/SrlSrlModel.h create mode 100644 src/srl/Srl/pred.cpp create mode 100644 src/srl/Srl/process/PredSrlSrl.cpp create mode 100644 src/srl/Srl/process/PredSrlSrl.h create mode 100644 src/srl/Srl/process/TrainSrlSrl.cpp create mode 100644 src/srl/Srl/process/TrainSrlSrl.h create mode 100644 src/srl/Srl/train.cpp create mode 100644 src/srl/common/CMakeLists.txt create mode 100644 src/srl/common/Const.h create mode 100644 src/srl/common/config/ModelConf.h create mode 100644 src/srl/common/extractor/ConverterDataToSrlPiSample.h create mode 100644 src/srl/common/extractor/ConverterFileContextToWordEmb.h create mode 100644 src/srl/common/extractor/ExtractorFileToWordEmb.h create mode 100644 src/srl/common/model/AffineTransformModelBuilder.cpp create mode 100644 src/srl/common/model/AffineTransformModelBuilder.h create mode 100644 src/srl/common/model/BaseLabelModel.h create mode 100644 src/srl/common/model/BiRNNModelBuilder.h create mode 100644 src/srl/common/model/CNN1dLayerBuilder.cpp create mode 100644 src/srl/common/model/CNN1dLayerBuilder.h create mode 100644 src/srl/common/model/ConstLookupModelBuilder.h create mode 100644 src/srl/common/model/LabelModel.h create mode 100644 src/srl/common/model/LookupModelBuilder.h create mode 100644 src/srl/common/model/MLPModelBuilder.h create mode 100644 src/srl/common/model/ModelBuilder.h create mode 100644 src/srl/common/model/PiSrlModel.h create mode 100644 src/srl/common/model/RNNModelBuilder.h create mode 100644 src/srl/common/model/SeqLabelModel.h create mode 100644 src/srl/common/process/DynetPredictor.h create mode 100644 src/srl/common/process/DynetTrainer.h create mode 100644 src/srl/common/process/LabelModelSGDSeqPredictor.h create mode 100644 src/srl/common/process/LabelModelSGDSeqTrainer.h create mode 100644 src/srl/common/process/LabelModelSGDTrainer.h create mode 100644 src/srl/common/process/TrainStats.h create mode 100644 src/srl/common/structure/SrlPiSample.cpp create mode 100644 src/srl/common/structure/SrlPiSample.h create mode 100644 src/srl/common/structure/Word.h create mode 100644 src/srl/common/structure/WordEmbBuilder.h delete mode 100644 src/srl/lgsrl.cpp delete mode 100644 src/srl/options.h create mode 100644 src/srl/tool/CMakeLists.txt create mode 100644 src/srl/tool/config/ToolConf.h create mode 100644 src/srl/tool/merge.cpp create mode 100644 src/srl/tool/process/merge.h delete mode 100644 src/srl/tree.hh create mode 100755 thirdparty/jsoncpp/pkg-config/jsoncpp.pc.in diff --git a/.gitignore b/.gitignore index 14dff34c5..95203610d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,7 @@ build ############### # output # ############### -#include/ +include/ lib/ bin/ tools/train/lgdpj @@ -39,6 +39,7 @@ tools/train/Debug/ ############### new_ltp_data/ ltp_data/ +ltp_data ################## # running folder # diff --git a/src/console/CMakeLists.txt b/src/console/CMakeLists.txt index 34f7ea5ce..2d2a4286e 100644 --- a/src/console/CMakeLists.txt +++ b/src/console/CMakeLists.txt @@ -3,11 +3,19 @@ include_directories (./ ${THIRDPARTY_DIR}/boost/include/ ${THIRDPARTY_DIR}/tinythreadpp ${THIRDPARTY_DIR}/maxent - ${THIRDPARTY_DIR}/tinyxml - ${THIRDPARTY_DIR}/jsoncpp/include) + ${THIRDPARTY_DIR}/tinyxml) set (ltp_test_SRC ltp_test.cpp ${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp) +# look for Boost +#if(DEFINED ENV{BOOST_ROOT}) +# set(Boost_NO_SYSTEM_PATHS ON) +#endif() +#set(Boost_REALPATH ON) +#find_package(Boost COMPONENTS program_options serialization REQUIRED) +#include_directories(${Boost_INCLUDE_DIR}) +#set(LIBS ${LIBS} ${Boost_LIBRARIES}) + link_directories ( ${LIBRARY_OUTPUT_PATH} ) add_executable (ltp_test ${ltp_test_SRC}) target_link_libraries (ltp_test @@ -17,12 +25,14 @@ target_link_libraries (ltp_test segmentor_static_lib postagger_static_lib parser_static_lib + lstm_sdparser_static_lib ner_static_lib srl_static_lib xml4nlp boost_regex_static_lib boost_program_options_static_lib - jsoncpp) + boost_serialization_static_lib + dynet) add_executable (cws_cmdline cws_cmdline.cpp ${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp) @@ -53,6 +63,12 @@ target_link_libraries (ner_cmdline ner_static_lib set_target_properties (ner_cmdline PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}/examples/) +add_executable (srl_cmdline srl_cmdline.cpp + ${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp) +target_link_libraries (srl_cmdline srl_static_lib + boost_program_options_static_lib) +set_target_properties (srl_cmdline PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${EXECUTABLE_OUTPUT_PATH}/examples/) if (NOT MSVC AND NOT MINGW) target_link_libraries (ltp_test pthread) diff --git a/src/console/dispatcher.h b/src/console/dispatcher.h index d01711e56..0bb6d99ed 100644 --- a/src/console/dispatcher.h +++ b/src/console/dispatcher.h @@ -3,9 +3,12 @@ #include #include +#include #include #include "tinythread.h" +using namespace std; + class Dispatcher { public: Dispatcher(void* engine, std::istream& is, std::ostream& os): @@ -24,6 +27,21 @@ class Dispatcher { return _max_idx ++; } + int next_block(vector& block) { + block.clear(); + tthread::lock_guard guard(_mutex); + std::string line; + while (std::getline(_is, line, '\n')) { + if (line != "") { + block.push_back(line); + } else { + return _max_idx ++; + } + } + if (block.size()) return _max_idx++; + return -1; + } + void output(const size_t& idx, const std::string& result) { tthread::lock_guard guard(_mutex); if (idx > _idx) { diff --git a/src/console/srl_cmdline.cpp b/src/console/srl_cmdline.cpp new file mode 100644 index 000000000..c4ed241bb --- /dev/null +++ b/src/console/srl_cmdline.cpp @@ -0,0 +1,180 @@ +// +// Created by yliu on 2017/5/24. +// + +#define EXECUTABLE "srl_cmdline" +#define DESCRIPTION "The console application for Semantic Role Labelling." + +#include +#include +#include +#include "config.h" +#include "srl/SRL_DLL.h" +#include "console/dispatcher.h" +#include "boost/program_options.hpp" +#include "utils/strutils.hpp" +#include "utils/time.hpp" + +using boost::program_options::options_description; +using boost::program_options::value; +using boost::program_options::variables_map; +using boost::program_options::store; +using boost::program_options::parse_command_line; +using ltp::utility::WallClockTimer; +using ltp::strutils::split; + + +inline int findRoot(vector > & parse, pair edge) { + int begin = edge.first; + int end = edge.second; + for (int j = begin; j <= end; ++j) { + if (parse[j].first < begin || parse[j].first > end) { + return j; + } + } + return begin; +} + +void multithreaded_srl(void *args) { + + Dispatcher * dispatcher = (Dispatcher *)args; + + while (true) { + vector buffer; + int ret = dispatcher->next_block(buffer); + if (ret < 0) + break; + if (!buffer.size()){ + continue; + } + + std::vector words; + std::vector postags; + vector > parse; + vector > > > > vecSRLResult; + for (int j = 0; j < buffer.size(); ++j) { + std::stringstream S(buffer[j]); + string str; int parent; + S >> str; words.push_back(str); + S >> str; postags.push_back(str); + S >> parent; S >> str; parse.push_back(make_pair(parent, str)); + } + + srl_dosrl(words, postags, parse, vecSRLResult); + + vector > arg(words.size(), vector(vecSRLResult.size(), "_")); + vector is_pred(words.size(), false); + for (int k = 0; k < vecSRLResult.size(); ++k) { + is_pred[vecSRLResult[k].first] = true; + for (int j = 0; j < vecSRLResult[k].second.size(); ++j) { + arg[findRoot(parse, vecSRLResult[k].second[j].second)][k] = vecSRLResult[k].second[k].first; + } + } + + std::stringstream S; S.clear(); S.str(""); + for (size_t i = 0; i < words.size(); ++ i) { + S << i << "\t" << words[i] << "\t" << postags[i] << "\t" << parse[i].first << "\t" << parse[i].second; + S << "\t" << (is_pred[i] ? "Y" : "_"); + for (int j = 0; j < arg[i].size(); ++j) { + S << "\t" << arg[i][j]; + } + + S << std::endl; + } + dispatcher->output(ret, S.str()); + } + + return; + +} + +int main(int argc, char ** argv) { + std::string usage = EXECUTABLE " in LTP " LTP_VERSION " - " LTP_COPYRIGHT "\n"; + usage += DESCRIPTION "\n\n"; + usage += "usage: ./" EXECUTABLE " \n\n"; + usage += "options"; + + options_description optparser = options_description(usage); + optparser.add_options() + ("threads", value(), "The number of threads [default=1].") + ("input", value(), "The path to the input file. " + "Input data should contain one word each line. " + "Sentence should be separated by a blank line. " + "(e.g. \"中国 ns 2 ATT\").") + ("pisrl-model", value(), + "The path to the pi-srl joint model [default=ltp_data/pos.model].") + ("help,h", "Show help information"); + + if (argc == 1) { + std::cerr << optparser << std::endl; + return 1; + } + + variables_map vm; + store(parse_command_line(argc, argv, optparser), vm); + + if (vm.count("help")) { + std::cerr << optparser << std::endl; + return 0; + } + + int threads = 1; + if (vm.count("threads")) { + threads = vm["threads"].as(); + if (threads < 0) { + std::cerr << "number of threads should not less than 0, reset to 1." << std::endl; + threads = 1; + } + } + + std::string input = ""; + if (vm.count("input")) { input = vm["input"].as(); } + + std::string srl_model = "ltp_data/pos.model"; + if (vm.count("pisrl-model")) { + srl_model = vm["pisrl-model"].as(); + } + + std::string postagger_lexcion = ""; + if (vm.count("postagger-lexicon")) { + postagger_lexcion = vm["postagger-lexicon"].as(); + } + + if (srl_load_resource(srl_model)) { + return 1; + } + + std::cerr << "TRACE: Model is loaded" << std::endl; + std::cerr << "TRACE: Running " << threads << " thread(s)" << std::endl; + + std::ifstream ifs(input.c_str()); + std::istream* is = NULL; + + if (!ifs.good()) { + std::cerr << "WARN: Cann't open file! use stdin instead." << std::endl; + is = (&std::cin); + } else { + is = (&ifs); + } + + Dispatcher * dispatcher = new Dispatcher( NULL, (*is), std::cout ); + WallClockTimer t; + std::list thread_list; + for (int i = 0; i < threads; ++ i) { + tthread::thread * t = new tthread::thread( multithreaded_srl, (void *)dispatcher ); + thread_list.push_back( t ); + } + + for (std::list::iterator i = thread_list.begin(); + i != thread_list.end(); ++ i) { + tthread::thread * t = *i; + t->join(); + delete t; + } + + std::cerr << "TRACE: consume " << t.elapsed() << " seconds." << std::endl; + delete dispatcher; + srl_release_resource(); + return 0; +} + diff --git a/src/ltp/LTPResource.cpp b/src/ltp/LTPResource.cpp index a7248df71..1b4be72c6 100644 --- a/src/ltp/LTPResource.cpp +++ b/src/ltp/LTPResource.cpp @@ -1,270 +1,269 @@ -#include "LTPResource.h" -#include "xml4nlp/Xml4nlp.h" -#include "splitsnt/SplitSentence.h" -#include "segmentor/segment_dll.h" -#include "postagger/postag_dll.h" -#include "parser.n/parser_dll.h" -#include "ner/ner_dll.h" -#include "srl/SRL_DLL.h" -#include "utils/logging.hpp" - -#if _WIN32 -#pragma warning(disable: 4786 4284) -#pragma comment(lib, "segmentor.lib") -#pragma comment(lib, "postagger.lib") -#pragma comment(lib, "parser.lib") -#pragma comment(lib, "ner.lib") -#pragma comment(lib, "srl.lib") -#endif - -LTPResource::LTPResource() : - m_segmentor(NULL), - m_postagger(NULL), - m_ner(NULL), - m_parser(NULL), - m_isSegmentorResourceLoaded(false), - m_isPostaggerResourceLoaded(false), - m_isNEResourceLoaded(false), - m_isParserResourceLoaded(false), - m_isSRLResourceLoaded(false) { -} - - -LTPResource::~LTPResource() { - ReleaseSegmentorResource(); - ReleasePostaggerResource(); - ReleaseNEResource(); - ReleaseParserResource(); - ReleaseSRLResource(); -} - -/* ======================================================== * - * Segmentor related resource management * - * ======================================================== */ -int LTPResource::LoadSegmentorResource(const char * model_file) { - if (m_isSegmentorResourceLoaded) { return 0; } - - INFO_LOG("Loading segmentor model from \"%s\" ...", model_file); - m_segmentor = segmentor_create_segmentor(model_file); - if (0 == m_segmentor) { - ERROR_LOG("Failed to load segmentor model"); - return -1; - } - - m_isSegmentorResourceLoaded = true; - INFO_LOG("segmentor model is loaded."); - return 0; -} - -int LTPResource::LoadSegmentorResource(const char* model_file, const char* lexicon) { - if (m_isSegmentorResourceLoaded) { return 0; } - - INFO_LOG("Loading segmentor model from \"%s\", \"%s\" ...", model_file, lexicon); - m_segmentor = segmentor_create_segmentor(model_file, lexicon); - if (0 == m_segmentor) { - ERROR_LOG("Failed to load segmentor model"); - return -1; - } - - m_isSegmentorResourceLoaded = true; - INFO_LOG("segmentor model is loaded."); - return 0; -} - -int LTPResource::LoadSegmentorResource(const std::string& model_file) { - return LoadSegmentorResource(model_file.c_str()); -} - -int LTPResource::LoadSegmentorResource(const std::string& model_file, - const std::string& lexicon) { - return LoadSegmentorResource(model_file.c_str(), lexicon.c_str()); -} - -void LTPResource::ReleaseSegmentorResource() { - if (!m_isSegmentorResourceLoaded) { return; } - - segmentor_release_segmentor(m_segmentor); - INFO_LOG("segmentor model is released."); - m_segmentor = 0; - m_isSegmentorResourceLoaded = false; -} - -void* LTPResource::GetSegmentor() { return m_segmentor; } - -/* ======================================================== * - * Postagger related resource management * - * ======================================================== */ -int LTPResource::LoadPostaggerResource(const char * model_file) { - if (m_isPostaggerResourceLoaded) { return 0; } - - INFO_LOG("Loading postagger model from \"%s\" ...", model_file); - m_postagger = postagger_create_postagger(model_file); - if (0 == m_postagger) { - ERROR_LOG("Failed to load postagger model"); - return -1; - } - - m_isPostaggerResourceLoaded = true; - INFO_LOG("postagger model is loaded"); - return 0; -} - - -int LTPResource::LoadPostaggerResource(const char* model_file, const char* lexicon) { - if (m_isPostaggerResourceLoaded) { return 0; } - - INFO_LOG("Loading postagger model from \"%s\" ...", model_file); - m_postagger = postagger_create_postagger(model_file); - if (0 == m_postagger) { - ERROR_LOG("Failed to load postagger model"); - return -1; - } - - m_isPostaggerResourceLoaded = true; - INFO_LOG("postagger model is loaded"); - return 0; -} - -int LTPResource::LoadPostaggerResource(const std::string& model_file) { - return LoadPostaggerResource(model_file.c_str()); -} - -int LTPResource::LoadPostaggerResource(const std::string& model_file, - const std::string& lexicon) { - return LoadPostaggerResource(model_file.c_str(), lexicon.c_str()); -} - -void LTPResource::ReleasePostaggerResource() { - if (!m_isPostaggerResourceLoaded) { return; } - postagger_release_postagger(m_postagger); - m_postagger = 0; - m_isPostaggerResourceLoaded = false; - INFO_LOG("postagger resource is released"); -} - -void * LTPResource::GetPostagger() { return m_postagger; } - -/* ======================================================== * - * NER related resource management * - * ======================================================== */ -int LTPResource::LoadNEResource(const char * model_file) { - if (m_isNEResourceLoaded) { - return 0; - } - - INFO_LOG("Loading NER resource from \"%s\"", model_file); - - m_ner = ner_create_recognizer(model_file); - - if (0 == m_ner) { - ERROR_LOG("Failed to load ner model"); - return -1; - } - - m_isNEResourceLoaded = true; - INFO_LOG("NER resource is loaded."); - return 0; -} - -int LTPResource::LoadNEResource(const std::string & model_file) { - return LoadNEResource(model_file.c_str()); -} - -void LTPResource::ReleaseNEResource() { - if (!m_isNEResourceLoaded) { - return; - } - - ner_release_recognizer(m_ner); - - m_ner = NULL; - m_isNEResourceLoaded = false; - INFO_LOG("NER resource is released"); -} - -void * LTPResource::GetNER() { - return m_ner; -} - -/* ====================================================== * - * Parser related resource * - * ====================================================== */ -int LTPResource::LoadParserResource(const char * model_file) { - if (m_isParserResourceLoaded) { - return 0; - } - - INFO_LOG("Loading parser resource from \"%s\"", model_file); - - m_parser = parser_create_parser(model_file); - if (!m_parser) { - ERROR_LOG("Failed to create parser"); - return -1; - } - - INFO_LOG("parser is loaded."); - - m_isParserResourceLoaded = true; - return 0; -} - -int LTPResource::LoadParserResource(const std::string & model_file) { - return LoadParserResource(model_file.c_str()); -} - -void LTPResource::ReleaseParserResource() { - if (!m_isParserResourceLoaded) { - return; - } - - parser_release_parser(m_parser); - INFO_LOG("Parser is released"); - - m_parser = NULL; - m_isParserResourceLoaded = false; -} - -void * LTPResource::GetParser() { - return m_parser; -} - -/* ======================================================== * - * SRL related resource management * - * ======================================================== */ -int LTPResource::LoadSRLResource(const char *data_folder) { - if (m_isSRLResourceLoaded) { - return 0; - } - - INFO_LOG("Loading SRL resource from \"%s\"", data_folder); - - if (0 != SRL_LoadResource(string(data_folder))) { - ERROR_LOG("Failed to load SRL resource."); - return -1; - } - - INFO_LOG("SRL resource is loaded."); - m_isSRLResourceLoaded = true; - return 0; -} - -int LTPResource::LoadSRLResource(const std::string & data_folder) { - return LoadSRLResource(data_folder.c_str()); -} - -void LTPResource::ReleaseSRLResource() { - if (!m_isSRLResourceLoaded) { - return; - } - - if (0 != SRL_ReleaseResource()) { - ERROR_LOG("Failed to release SRL resource"); - return; - } - - INFO_LOG("SRL is released"); - - m_isSRLResourceLoaded = false; - return; -} - +#include "LTPResource.h" +#include "xml4nlp/Xml4nlp.h" +#include "splitsnt/SplitSentence.h" +#include "segmentor/segment_dll.h" +#include "postagger/postag_dll.h" +#include "parser.n/parser_dll.h" +#include "ner/ner_dll.h" +#include "srl/SRL_DLL.h" +#include "utils/logging.hpp" + +#if _WIN32 +#pragma warning(disable: 4786 4284) +#pragma comment(lib, "segmentor.lib") +#pragma comment(lib, "postagger.lib") +#pragma comment(lib, "parser.lib") +#pragma comment(lib, "ner.lib") +#pragma comment(lib, "srl.lib") +#endif + +LTPResource::LTPResource() : + m_segmentor(NULL), + m_postagger(NULL), + m_ner(NULL), + m_parser(NULL), + m_isSegmentorResourceLoaded(false), + m_isPostaggerResourceLoaded(false), + m_isNEResourceLoaded(false), + m_isParserResourceLoaded(false), + m_isSRLResourceLoaded(false) { +} + + +LTPResource::~LTPResource() { + ReleaseSegmentorResource(); + ReleasePostaggerResource(); + ReleaseNEResource(); + ReleaseParserResource(); + ReleaseSRLResource(); +} + +/* ======================================================== * + * Segmentor related resource management * + * ======================================================== */ +int LTPResource::LoadSegmentorResource(const char * model_file) { + if (m_isSegmentorResourceLoaded) { return 0; } + + INFO_LOG("Loading segmentor model from \"%s\" ...", model_file); + m_segmentor = segmentor_create_segmentor(model_file); + if (0 == m_segmentor) { + ERROR_LOG("Failed to load segmentor model"); + return -1; + } + + m_isSegmentorResourceLoaded = true; + INFO_LOG("segmentor model is loaded."); + return 0; +} + +int LTPResource::LoadSegmentorResource(const char* model_file, const char* lexicon) { + if (m_isSegmentorResourceLoaded) { return 0; } + + INFO_LOG("Loading segmentor model from \"%s\", \"%s\" ...", model_file, lexicon); + m_segmentor = segmentor_create_segmentor(model_file, lexicon); + if (0 == m_segmentor) { + ERROR_LOG("Failed to load segmentor model"); + return -1; + } + + m_isSegmentorResourceLoaded = true; + INFO_LOG("segmentor model is loaded."); + return 0; +} + +int LTPResource::LoadSegmentorResource(const std::string& model_file) { + return LoadSegmentorResource(model_file.c_str()); +} + +int LTPResource::LoadSegmentorResource(const std::string& model_file, + const std::string& lexicon) { + return LoadSegmentorResource(model_file.c_str(), lexicon.c_str()); +} + +void LTPResource::ReleaseSegmentorResource() { + if (!m_isSegmentorResourceLoaded) { return; } + + segmentor_release_segmentor(m_segmentor); + INFO_LOG("segmentor model is released."); + m_segmentor = 0; + m_isSegmentorResourceLoaded = false; +} + +void* LTPResource::GetSegmentor() { return m_segmentor; } + +/* ======================================================== * + * Postagger related resource management * + * ======================================================== */ +int LTPResource::LoadPostaggerResource(const char * model_file) { + if (m_isPostaggerResourceLoaded) { return 0; } + + INFO_LOG("Loading postagger model from \"%s\" ...", model_file); + m_postagger = postagger_create_postagger(model_file); + if (0 == m_postagger) { + ERROR_LOG("Failed to load postagger model"); + return -1; + } + + m_isPostaggerResourceLoaded = true; + INFO_LOG("postagger model is loaded"); + return 0; +} + + +int LTPResource::LoadPostaggerResource(const char* model_file, const char* lexicon) { + if (m_isPostaggerResourceLoaded) { return 0; } + + INFO_LOG("Loading postagger model from \"%s\" ...", model_file); + m_postagger = postagger_create_postagger(model_file); + if (0 == m_postagger) { + ERROR_LOG("Failed to load postagger model"); + return -1; + } + + m_isPostaggerResourceLoaded = true; + INFO_LOG("postagger model is loaded"); + return 0; +} + +int LTPResource::LoadPostaggerResource(const std::string& model_file) { + return LoadPostaggerResource(model_file.c_str()); +} + +int LTPResource::LoadPostaggerResource(const std::string& model_file, + const std::string& lexicon) { + return LoadPostaggerResource(model_file.c_str(), lexicon.c_str()); +} + +void LTPResource::ReleasePostaggerResource() { + if (!m_isPostaggerResourceLoaded) { return; } + postagger_release_postagger(m_postagger); + m_postagger = 0; + m_isPostaggerResourceLoaded = false; + INFO_LOG("postagger resource is released"); +} + +void * LTPResource::GetPostagger() { return m_postagger; } + +/* ======================================================== * + * NER related resource management * + * ======================================================== */ +int LTPResource::LoadNEResource(const char * model_file) { + if (m_isNEResourceLoaded) { + return 0; + } + + INFO_LOG("Loading NER resource from \"%s\"", model_file); + + m_ner = ner_create_recognizer(model_file); + + if (0 == m_ner) { + ERROR_LOG("Failed to load ner model"); + return -1; + } + + m_isNEResourceLoaded = true; + INFO_LOG("NER resource is loaded."); + return 0; +} + +int LTPResource::LoadNEResource(const std::string & model_file) { + return LoadNEResource(model_file.c_str()); +} + +void LTPResource::ReleaseNEResource() { + if (!m_isNEResourceLoaded) { + return; + } + + ner_release_recognizer(m_ner); + + m_ner = NULL; + m_isNEResourceLoaded = false; + INFO_LOG("NER resource is released"); +} + +void * LTPResource::GetNER() { + return m_ner; +} + +/* ====================================================== * + * Parser related resource * + * ====================================================== */ +int LTPResource::LoadParserResource(const char * model_file) { + if (m_isParserResourceLoaded) { + return 0; + } + + INFO_LOG("Loading parser resource from \"%s\"", model_file); + + m_parser = parser_create_parser(model_file); + if (!m_parser) { + ERROR_LOG("Failed to create parser"); + return -1; + } + + INFO_LOG("parser is loaded."); + + m_isParserResourceLoaded = true; + return 0; +} + +int LTPResource::LoadParserResource(const std::string & model_file) { + return LoadParserResource(model_file.c_str()); +} + +void LTPResource::ReleaseParserResource() { + if (!m_isParserResourceLoaded) { + return; + } + + parser_release_parser(m_parser); + INFO_LOG("Parser is released"); + + m_parser = NULL; + m_isParserResourceLoaded = false; +} + +void * LTPResource::GetParser() { + return m_parser; +} + +/* ======================================================== * + * srl related resource management * + * ======================================================== */ +int LTPResource::LoadSRLResource(const char *data_folder) { + if (m_isSRLResourceLoaded) { + return 0; + } + + INFO_LOG("Loading srl resource from \"%s\"", data_folder); + + if (0 != srl_load_resource(string(data_folder))) { + ERROR_LOG("Failed to load srl resource."); + return -1; + } + + INFO_LOG("srl resource is loaded."); + m_isSRLResourceLoaded = true; + return 0; +} + +int LTPResource::LoadSRLResource(const std::string & data_folder) { + return LoadSRLResource(data_folder.c_str()); +} + +void LTPResource::ReleaseSRLResource() { + if (!m_isSRLResourceLoaded) { + return; + } + + if (0 != srl_release_resource()) { + ERROR_LOG("Failed to release srl resource"); + return; + } + + INFO_LOG("srl is released"); + + m_isSRLResourceLoaded = false; + return; +} diff --git a/src/ltp/LTPResource.h b/src/ltp/LTPResource.h index 880ffd15f..ff821c063 100644 --- a/src/ltp/LTPResource.h +++ b/src/ltp/LTPResource.h @@ -1,88 +1,116 @@ -#ifndef __LTP_RESOURCE_H__ -#define __LTP_RESOURCE_H__ - -#include - -class LTPResource { -public: - LTPResource(); - ~LTPResource(); - - /** - * Load segmentor resource from model file. Return 0 on success, - * otherwise -1. - * - * @param[in] model_file the model_file - * @return int 0 on success, otherwise -1 - */ - int LoadSegmentorResource(const char* model_file); - int LoadSegmentorResource(const char* model_file, const char* lexicon); - int LoadSegmentorResource(const std::string& model_file); - int LoadSegmentorResource(const std::string& model_file, const std::string& lexicon); - - /** - * load postagger resource from model file. Return 0 on success, - * otherwise -1. - * - * @param[in] model_file - * @return int 0 on success, otherwise -1 - */ - int LoadPostaggerResource(const char* model_file); - int LoadPostaggerResource(const char* model_file, const char* lexicon); - int LoadPostaggerResource(const std::string& model_file); - int LoadPostaggerResource(const std::string& model_file, const std::string& lexicon); - - /** - * load parser resource from model file. Return 0 on success, - * otherwise -1. - * - * @param[in] model_file - * @return int 0 on success, otherwise -1 - */ - int LoadNEResource(const char * model_file); - int LoadNEResource(const std::string & model_file); - - /** - * load parser resource from model file. Return 0 on success, - * otherwise -1. - * - * @param[in] model_file - * @return int 0 on success, otherwise -1 - */ - int LoadParserResource(const char* model_file); - int LoadParserResource(const std::string& model_file); - - /** - * load srl resource from model file. Return 0 on success, - * otherwise -1. - * - * @param[in] model_file - * @return int 0 on success, otherwise -1 - */ - int LoadSRLResource(const char* data_folder); - int LoadSRLResource(const std::string& data_folder); - - void ReleaseSegmentorResource(void); - void ReleasePostaggerResource(void); - void ReleaseNEResource(void); - void ReleaseParserResource(void); - void ReleaseSRLResource(void); - - void* GetSegmentor(); // access the segmentor. - void* GetPostagger(); // access the postagger. - void* GetParser(); // access the parser. - void* GetNER(); // access the ner. -private: - void* m_segmentor; - void* m_postagger; - void* m_parser; - void* m_ner; -private: - bool m_isSegmentorResourceLoaded; - bool m_isPostaggerResourceLoaded; - bool m_isNEResourceLoaded; - bool m_isParserResourceLoaded; - bool m_isSRLResourceLoaded; -}; - -#endif // end for __LTP_RESOURCE_H__ +#ifndef __LTP_RESOURCE_H__ +#define __LTP_RESOURCE_H__ + +#include + +class LTPResource { +public: + LTPResource(); + ~LTPResource(); + + /** + * Load segmentor resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file the model_file + * @return int 0 on success, otherwise -1 + */ + int LoadSegmentorResource(const char* model_file); + int LoadSegmentorResource(const char* model_file, const char* lexicon); + int LoadSegmentorResource(const std::string& model_file); + int LoadSegmentorResource(const std::string& model_file, const std::string& lexicon); + + /** + * load postagger resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file + * @return int 0 on success, otherwise -1 + */ + int LoadPostaggerResource(const char* model_file); + int LoadPostaggerResource(const char* model_file, const char* lexicon); + int LoadPostaggerResource(const std::string& model_file); + int LoadPostaggerResource(const std::string& model_file, const std::string& lexicon); + + /** + * load parser resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file + * @return int 0 on success, otherwise -1 + */ + int LoadNEResource(const char * model_file); + int LoadNEResource(const std::string & model_file); + + /** + * load parser resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file + * @return int 0 on success, otherwise -1 + */ + int LoadParserResource(const char* model_file); + int LoadParserResource(const std::string& model_file); + + /** + * load semantic parser resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file + * @return int 0 on success, otherwise -1 + */ + int LoadSemanticParserResource(const char* model_file); + int LoadSemanticParserResource(const std::string& model_file); + + /** + * load lstm semantic parser resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file + * @return int 0 on success, otherwise -1 + */ + int LoadLSTMSemanticParserResource(const char* data_dir); + int LoadLSTMSemanticParserResource(const std::string& data_dir); + + /** + * load srl resource from model file. Return 0 on success, + * otherwise -1. + * + * @param[in] model_file + * @return int 0 on success, otherwise -1 + */ + int LoadSRLResource(const char* data_folder); + int LoadSRLResource(const std::string& data_folder); + + void ReleaseSegmentorResource(void); + void ReleasePostaggerResource(void); + void ReleaseNEResource(void); + void ReleaseParserResource(void); + void ReleaseSemanticParserResource(void); + void ReleaseLSTMSemanticParserResource(void); + void ReleaseSRLResource(void); + + void* GetSegmentor(); // access the segmentor. + void* GetPostagger(); // access the postagger. + void* GetParser(); // access the parser. + void* GetSemanticParser(); // access the semanticparser. + void* GetLSTMSemanticParser(); // access the semanticparser. + void* GetNER(); // access the ner. +private: + void* m_segmentor; + void* m_postagger; + void* m_parser; + void* m_semanticparser; + void* m_lstmsemanticparser; + void* m_ner; +private: + bool m_isSegmentorResourceLoaded; + bool m_isPostaggerResourceLoaded; + bool m_isNEResourceLoaded; + bool m_isParserResourceLoaded; + bool m_isSemanticParserResourceLoaded; + bool m_isLSTMSemanticParserResourceLoaded; + bool m_isSRLResourceLoaded; +}; + +#endif // end for __LTP_RESOURCE_H__ \ No newline at end of file diff --git a/src/ltp/Ltp.cpp b/src/ltp/Ltp.cpp index 66a77e699..c179476f6 100644 --- a/src/ltp/Ltp.cpp +++ b/src/ltp/Ltp.cpp @@ -1,484 +1,485 @@ -#include "Ltp.h" -#include -#include -#include - -#include "xml4nlp/Xml4nlp.h" -#include "splitsnt/SplitSentence.h" -#include "segmentor/segment_dll.h" -#include "postagger/postag_dll.h" -#include "parser.n/parser_dll.h" -#include "ner/ner_dll.h" -#include "srl/SRL_DLL.h" -#include "utils/codecs.hpp" -#include "utils/logging.hpp" - -#if _WIN32 -#pragma warning(disable: 4786 4284) -#pragma comment(lib, "segmentor.lib") -#pragma comment(lib, "postagger.lib") -#pragma comment(lib, "parser.lib") -#pragma comment(lib, "ner.lib") -#pragma comment(lib, "srl.lib") -#endif - -// create a platform -LTP::LTP(const std::string& last_stage, - const std::string& segmentor_model_file, - const std::string& segmentor_lexicon_file, - const std::string& postagger_model_file, - const std::string& postagger_lexicon_file, - const std::string& ner_model_file, - const std::string& parser_model_file, - const std::string& srl_model_dir) - : _resource(), _loaded(false) { - _loaded = load(last_stage, - segmentor_model_file, segmentor_lexicon_file, - postagger_model_file, postagger_lexicon_file, - ner_model_file, - parser_model_file, - srl_model_dir); -} - -bool LTP::load(const std::string& last_stage, - const std::string& segmentor_model_file, - const std::string& segmentor_lexicon_file, - const std::string& postagger_model_file, - const std::string& postagger_lexicon_file, - const std::string& ner_model_file, - const std::string& parser_model_file, - const std::string& srl_model_dir) { - - size_t target_mask = 0; - if (last_stage == LTP_SERVICE_NAME_SEGMENT) { - target_mask = kActiveSegmentor; - } else if (last_stage == LTP_SERVICE_NAME_POSTAG) { - target_mask = (kActiveSegmentor|kActivePostagger); - } else if (last_stage == LTP_SERVICE_NAME_NER) { - target_mask = (kActiveSegmentor|kActivePostagger|kActiveNER); - } else if (last_stage == LTP_SERVICE_NAME_DEPPARSE) { - target_mask = (kActiveSegmentor|kActivePostagger|kActiveParser); - } else if ((last_stage == LTP_SERVICE_NAME_SRL) || (last_stage == LTP_SERVICE_NAME_ALL)) { - target_mask = - (kActiveSegmentor|kActivePostagger|kActiveNER|kActiveParser|kActiveSRL); - } - - size_t loaded_mask = 0; - - if (target_mask & kActiveSegmentor) { - int ret; - if (segmentor_lexicon_file == "") { - ret = _resource.LoadSegmentorResource(segmentor_model_file); - } else { - ret = _resource.LoadSegmentorResource(segmentor_model_file, segmentor_lexicon_file); - } - if (0 != ret) { - ERROR_LOG("in LTP::wordseg, failed to load segmentor resource"); - return false; - } - loaded_mask |= kActiveSegmentor; - } - - if (target_mask & kActivePostagger) { - int ret; - if (postagger_lexicon_file == "") { - ret = _resource.LoadPostaggerResource(postagger_model_file); - } else { - ret = _resource.LoadPostaggerResource(postagger_model_file, postagger_lexicon_file); - } - if (0 != ret) { - ERROR_LOG("in LTP::wordseg, failed to load postagger resource"); - return false; - } - loaded_mask |= kActivePostagger; - } - - if (target_mask & kActiveNER) { - if (0 != _resource.LoadNEResource(ner_model_file)) { - ERROR_LOG("in LTP::ner, failed to load ner resource"); - return false; - } - loaded_mask |= kActiveNER; - } - - if (target_mask & kActiveParser) { - if (0 != _resource.LoadParserResource(parser_model_file)) { - ERROR_LOG("in LTP::parser, failed to load parser resource"); - return false; - } - loaded_mask |= kActiveParser; - } - - if (target_mask & kActiveSRL) { - if ( 0 != _resource.LoadSRLResource(srl_model_dir)) { - ERROR_LOG("in LTP::srl, failed to load srl resource"); - return false; - } - loaded_mask |= kActiveSRL; - } - - if ((loaded_mask & target_mask) != target_mask) { - ERROR_LOG("target is config but resource not loaded."); - return false; - } - - INFO_LOG("Resources loading finished."); - - return true; -} - - -LTP::~LTP() {} - -bool LTP::loaded() const { return _loaded; } - -// If you do NOT split sentence explicitly, -// this will be called according to dependencies among modules -int LTP::splitSentence_dummy(XML4NLP & xml) { - if ( xml.QueryNote(NOTE_SENT) ) { - return 0; - } - - int paraNum = xml.CountParagraphInDocument(); - - if (paraNum == 0) { - ERROR_LOG("in LTP::splitsent, There is no paragraph in doc,"); - ERROR_LOG("you may have loaded a blank file or have not loaded a file yet."); - return kEmptyStringError; - } - - for (int i = 0; i < paraNum; ++i) { - vector vecSentences; - string para; - xml.GetParagraph(i, para); - - if (0 == SplitSentence( para, vecSentences )) { - ERROR_LOG("in LTP::splitsent, failed to split sentence"); - return kSplitSentenceError; - } - - // dummy - // vecSentences.push_back(para); - if (0 != xml.SetSentencesToParagraph(vecSentences, i)) { - ERROR_LOG("in LTP::splitsent, failed to write sentence to xml"); - return kWriteXmlError; - } - } - - xml.SetNote(NOTE_SENT); - return 0; -} - -// integrate word segmentor into LTP -int LTP::wordseg(XML4NLP & xml) { - if (xml.QueryNote(NOTE_WORD)) { - return 0; - } - - // - int ret = splitSentence_dummy(xml); - if (0 != ret) { - ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess."); - return ret; - } - - // get the segmentor pointer - void * segmentor = _resource.GetSegmentor(); - if (0 == segmentor) { - ERROR_LOG("in LTP::wordseg, failed to init a segmentor"); - return kWordsegError; - } - - int stnsNum = xml.CountSentenceInDocument(); - - if (0 == stnsNum) { - ERROR_LOG("in LTP::wordseg, number of sentence equals 0"); - return kEmptyStringError; - } - - for (int i = 0; i < stnsNum; ++ i) { - std::string strStn = xml.GetSentence(i); - std::vector vctWords; - - if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) { - ERROR_LOG("in LTP::wordseg, input sentence is too long"); - return kSentenceTooLongError; - } - - if (0 == segmentor_segment(segmentor, strStn, vctWords)) { - ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"", - strStn.c_str()); - return kWordsegError; - } - - if (0 != xml.SetWordsToSentence(vctWords, i)) { - ERROR_LOG("in LTP::wordseg, failed to write segment result to xml"); - return kWriteXmlError; - } - } - - xml.SetNote(NOTE_WORD); - return 0; -} - -// integrate postagger into LTP -int LTP::postag(XML4NLP & xml) { - if ( xml.QueryNote(NOTE_POS) ) { - return 0; - } - - // dependency - int ret = wordseg(xml); - if (0 != ret) { - ERROR_LOG("in LTP::postag, failed to perform word segment preprocess"); - return ret; - } - - void * postagger = _resource.GetPostagger(); - if (0 == postagger) { - ERROR_LOG("in LTP::postag, failed to init a postagger"); - return kPostagError; - } - - int stnsNum = xml.CountSentenceInDocument(); - - if (0 == stnsNum) { - ERROR_LOG("in LTP::postag, number of sentence equals 0"); - return kEmptyStringError; - } - - for (int i = 0; i < stnsNum; ++i) { - vector vecWord; - vector vecPOS; - - xml.GetWordsFromSentence(vecWord, i); - - if (0 == vecWord.size()) { - ERROR_LOG("Input sentence is empty."); - return kEmptyStringError; - } - - if (vecWord.size() > MAX_WORDS_NUM) { - ERROR_LOG("Input sentence is too long."); - return kSentenceTooLongError; - } - - if (0 == postagger_postag(postagger, vecWord, vecPOS)) { - ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1); - return kPostagError; - } - - if (xml.SetPOSsToSentence(vecPOS, i) != 0) { - ERROR_LOG("in LTP::postag, failed to write postag result to xml"); - return kWriteXmlError; - } - } - - xml.SetNote(NOTE_POS); - - return 0; -} - -// perform ner over xml -int LTP::ner(XML4NLP & xml) { - if ( xml.QueryNote(NOTE_NE) ) { - return 0; - } - - // dependency - int ret = postag(xml); - if (0 != ret) { - ERROR_LOG("in LTP::ner, failed to perform postag preprocess"); - return ret; - } - - void * ner = _resource.GetNER(); - - if (NULL == ner) { - ERROR_LOG("in LTP::ner, failed to init a ner."); - return kNERError; - } - - int stnsNum = xml.CountSentenceInDocument(); - - if (stnsNum == 0) { - ERROR_LOG("in LTP::ner, number of sentence equals 0"); - return kEmptyStringError; - } - - for (int i = 0; i < stnsNum; ++ i) { - vector vecWord; - vector vecPOS; - vector vecNETag; - - if (xml.GetWordsFromSentence(vecWord, i) != 0) { - ERROR_LOG("in LTP::ner, failed to get words from xml"); - return kReadXmlError; - } - - if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { - ERROR_LOG("in LTP::ner, failed to get postags from xml"); - return kNERError; - } - - if (0 == vecWord.size()) { - ERROR_LOG("Input sentence is empty."); - return kEmptyStringError; - } - - if (vecWord.size() > MAX_WORDS_NUM) { - ERROR_LOG("Input sentence is too long."); - return kSentenceTooLongError; - } - - if (0 == ner_recognize(ner, vecWord, vecPOS, vecNETag)) { - ERROR_LOG("in LTP::ner, failed to perform ner on sent. #%d", i+1); - return kNERError; - } - - xml.SetNEsToSentence(vecNETag, i); - } - - xml.SetNote(NOTE_NE); - return 0; -} - -int LTP::parser(XML4NLP & xml) { - if ( xml.QueryNote(NOTE_PARSER) ) return 0; - - int ret = postag(xml); - if (0 != ret) { - ERROR_LOG("in LTP::parser, failed to perform postag preprocessing"); - return ret; - } - - void * parser = _resource.GetParser(); - - if (parser == NULL) { - ERROR_LOG("in LTP::parser, failed to init a parser"); - return kParserError; - } - - int stnsNum = xml.CountSentenceInDocument(); - if (stnsNum == 0) { - ERROR_LOG("in LTP::parser, number of sentences equals 0"); - return kEmptyStringError; - } - - for (int i = 0; i < stnsNum; ++i) { - std::vector vecWord; - std::vector vecPOS; - std::vector vecHead; - std::vector vecRel; - - if (xml.GetWordsFromSentence(vecWord, i) != 0) { - ERROR_LOG("in LTP::parser, failed to get words from xml"); - return kReadXmlError; - } - - if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { - ERROR_LOG("in LTP::parser, failed to get postags from xml"); - return kReadXmlError; - } - - if (0 == vecWord.size()) { - ERROR_LOG("Input sentence is empty."); - return kEmptyStringError; - } - - if (vecWord.size() > MAX_WORDS_NUM) { - ERROR_LOG("Input sentence is too long."); - return kSentenceTooLongError; - } - - if (-1 == parser_parse(parser, vecWord, vecPOS, vecHead, vecRel)) { - ERROR_LOG("in LTP::parser, failed to perform parse on sent. #%d", i+1); - return kParserError; - } - - if (0 != xml.SetParsesToSentence(vecHead, vecRel, i)) { - ERROR_LOG("in LTP::parser, failed to write parse result to xml"); - return kWriteXmlError; - } - } - - xml.SetNote(NOTE_PARSER); - - return 0; -} - -int LTP::srl(XML4NLP & xml) { - if ( xml.QueryNote(NOTE_SRL) ) return 0; - - // dependency - int ret = ner(xml); - if (0 != ret) { - ERROR_LOG("in LTP::srl, failed to perform ner preprocess"); - return ret; - } - - ret = parser(xml); - if (0 != ret) { - ERROR_LOG("in LTP::srl, failed to perform parsing preprocess"); - return ret; - } - - int stnsNum = xml.CountSentenceInDocument(); - if (stnsNum == 0) { - ERROR_LOG("in LTP::srl, number of sentence equals 0"); - return kEmptyStringError; - } - - for (int i = 0; i < stnsNum; ++i) { - vector vecWord; - vector vecPOS; - vector vecNE; - vector< pair > vecParse; - vector< pair > > > > vecSRLResult; - - if (xml.GetWordsFromSentence(vecWord, i) != 0) { - ERROR_LOG("in LTP::ner, failed to get words from xml"); - return kReadXmlError; - } - - if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { - ERROR_LOG("in LTP::ner, failed to get postags from xml"); - return kReadXmlError; - } - - if (xml.GetNEsFromSentence(vecNE, i) != 0) { - ERROR_LOG("in LTP::ner, failed to get ner result from xml"); - return kReadXmlError; - } - - if (xml.GetParsesFromSentence(vecParse, i) != 0) { - ERROR_LOG("in LTP::ner, failed to get parsing result from xml"); - return kReadXmlError; - } - - if (0 != SRL(vecWord, vecPOS, vecNE, vecParse, vecSRLResult)) { - ERROR_LOG("in LTP::srl, failed to perform srl on sent. #%d", i+1); - return kSRLError; - } - - int j = 0; - for (; j < vecSRLResult.size(); ++j) { - vector vecType; - vector< pair > vecBegEnd; - int k = 0; - - for (; k < vecSRLResult[j].second.size(); ++k) { - vecType.push_back(vecSRLResult[j].second[k].first); - vecBegEnd.push_back(vecSRLResult[j].second[k].second); - } - - if (0 != xml.SetPredArgToWord(i, vecSRLResult[j].first, vecType, vecBegEnd)) { - return kWriteXmlError; - } - } - } - - xml.SetNote(NOTE_SRL); - return 0; -} - +#include "Ltp.h" +#include +#include +#include + +#include "xml4nlp/Xml4nlp.h" +#include "splitsnt/SplitSentence.h" +#include "segmentor/segment_dll.h" +#include "postagger/postag_dll.h" +#include "parser.n/parser_dll.h" +#include "ner/ner_dll.h" +#include "srl/SRL_DLL.h" +#include "utils/codecs.hpp" +#include "utils/logging.hpp" + +#if _WIN32 +#pragma warning(disable: 4786 4284) +#pragma comment(lib, "segmentor.lib") +#pragma comment(lib, "postagger.lib") +#pragma comment(lib, "parser.lib") +#pragma comment(lib, "ner.lib") +#pragma comment(lib, "srl.lib") +#endif + +// create a platform +LTP::LTP(const std::string& last_stage, + const std::string& segmentor_model_file, + const std::string& segmentor_lexicon_file, + const std::string& postagger_model_file, + const std::string& postagger_lexicon_file, + const std::string& ner_model_file, + const std::string& parser_model_file, + const std::string& srl_model_dir) + : _resource(), _loaded(false) { + _loaded = load(last_stage, + segmentor_model_file, segmentor_lexicon_file, + postagger_model_file, postagger_lexicon_file, + ner_model_file, + parser_model_file, + srl_model_dir); +} + +bool LTP::load(const std::string& last_stage, + const std::string& segmentor_model_file, + const std::string& segmentor_lexicon_file, + const std::string& postagger_model_file, + const std::string& postagger_lexicon_file, + const std::string& ner_model_file, + const std::string& parser_model_file, + const std::string& srl_model_file) { + + size_t target_mask = 0; + if (last_stage == LTP_SERVICE_NAME_SEGMENT) { + target_mask = kActiveSegmentor; + } else if (last_stage == LTP_SERVICE_NAME_POSTAG) { + target_mask = (kActiveSegmentor|kActivePostagger); + } else if (last_stage == LTP_SERVICE_NAME_NER) { + target_mask = (kActiveSegmentor|kActivePostagger|kActiveNER); + } else if (last_stage == LTP_SERVICE_NAME_DEPPARSE) { + target_mask = (kActiveSegmentor|kActivePostagger|kActiveParser); + } else if (last_stage == LTP_SERVICE_NAME_SRL) { + target_mask = (kActiveSegmentor|kActivePostagger|kActiveParser|kActiveSRL); + } else if (last_stage == "all") { + target_mask = + (kActiveSegmentor|kActivePostagger|kActiveNER|kActiveParser|kActiveSRL); + } + + size_t loaded_mask = 0; + + if (target_mask & kActiveSegmentor) { + int ret; + if (segmentor_lexicon_file == "") { + ret = _resource.LoadSegmentorResource(segmentor_model_file); + } else { + ret = _resource.LoadSegmentorResource(segmentor_model_file, segmentor_lexicon_file); + } + if (0 != ret) { + ERROR_LOG("in LTP::wordseg, failed to load segmentor resource"); + return false; + } + loaded_mask |= kActiveSegmentor; + } + + if (target_mask & kActivePostagger) { + int ret; + if (postagger_lexicon_file == "") { + ret = _resource.LoadPostaggerResource(postagger_model_file); + } else { + ret = _resource.LoadPostaggerResource(postagger_model_file, postagger_lexicon_file); + } + if (0 != ret) { + ERROR_LOG("in LTP::wordseg, failed to load postagger resource"); + return false; + } + loaded_mask |= kActivePostagger; + } + + if (target_mask & kActiveNER) { + if (0 != _resource.LoadNEResource(ner_model_file)) { + ERROR_LOG("in LTP::ner, failed to load ner resource"); + return false; + } + loaded_mask |= kActiveNER; + } + + if (target_mask & kActiveParser) { + if (0 != _resource.LoadParserResource(parser_model_file)) { + ERROR_LOG("in LTP::parser, failed to load parser resource"); + return false; + } + loaded_mask |= kActiveParser; + } + + if (target_mask & kActiveSRL) { + if ( 0 != _resource.LoadSRLResource(srl_model_file)) { + ERROR_LOG("in LTP::srl, failed to load srl resource"); + return false; + } + loaded_mask |= kActiveSRL; + } + + if ((loaded_mask & target_mask) != target_mask) { + ERROR_LOG("target is config but resource not loaded."); + return false; + } + + INFO_LOG("Resources loading finished."); + + return true; +} + + +LTP::~LTP() {} + +bool LTP::loaded() const { return _loaded; } + +// If you do NOT split sentence explicitly, +// this will be called according to dependencies among modules +int LTP::splitSentence_dummy(XML4NLP & xml) { + if ( xml.QueryNote(NOTE_SENT) ) { + return 0; + } + + int paraNum = xml.CountParagraphInDocument(); + + if (paraNum == 0) { + ERROR_LOG("in LTP::splitsent, There is no paragraph in doc,"); + ERROR_LOG("you may have loaded a blank file or have not loaded a file yet."); + return kEmptyStringError; + } + + for (int i = 0; i < paraNum; ++i) { + vector vecSentences; + string para; + xml.GetParagraph(i, para); + + if (0 == SplitSentence( para, vecSentences )) { + ERROR_LOG("in LTP::splitsent, failed to split sentence"); + return kSplitSentenceError; + } + + // dummy + // vecSentences.push_back(para); + if (0 != xml.SetSentencesToParagraph(vecSentences, i)) { + ERROR_LOG("in LTP::splitsent, failed to write sentence to xml"); + return kWriteXmlError; + } + } + + xml.SetNote(NOTE_SENT); + return 0; +} + +// integrate word segmentor into LTP +int LTP::wordseg(XML4NLP & xml) { + if (xml.QueryNote(NOTE_WORD)) { + return 0; + } + + // + int ret = splitSentence_dummy(xml); + if (0 != ret) { + ERROR_LOG("in LTP::wordseg, failed to perform split sentence preprocess."); + return ret; + } + + // get the segmentor pointer + void * segmentor = _resource.GetSegmentor(); + if (0 == segmentor) { + ERROR_LOG("in LTP::wordseg, failed to init a segmentor"); + return kWordsegError; + } + + int stnsNum = xml.CountSentenceInDocument(); + + if (0 == stnsNum) { + ERROR_LOG("in LTP::wordseg, number of sentence equals 0"); + return kEmptyStringError; + } + + for (int i = 0; i < stnsNum; ++ i) { + std::string strStn = xml.GetSentence(i); + std::vector vctWords; + + if (ltp::strutils::codecs::length(strStn) > MAX_SENTENCE_LEN) { + ERROR_LOG("in LTP::wordseg, input sentence is too long"); + return kSentenceTooLongError; + } + + if (0 == segmentor_segment(segmentor, strStn, vctWords)) { + ERROR_LOG("in LTP::wordseg, failed to perform word segment on \"%s\"", + strStn.c_str()); + return kWordsegError; + } + + if (0 != xml.SetWordsToSentence(vctWords, i)) { + ERROR_LOG("in LTP::wordseg, failed to write segment result to xml"); + return kWriteXmlError; + } + } + + xml.SetNote(NOTE_WORD); + return 0; +} + +// integrate postagger into LTP +int LTP::postag(XML4NLP & xml) { + if ( xml.QueryNote(NOTE_POS) ) { + return 0; + } + + // dependency + int ret = wordseg(xml); + if (0 != ret) { + ERROR_LOG("in LTP::postag, failed to perform word segment preprocess"); + return ret; + } + + void * postagger = _resource.GetPostagger(); + if (0 == postagger) { + ERROR_LOG("in LTP::postag, failed to init a postagger"); + return kPostagError; + } + + int stnsNum = xml.CountSentenceInDocument(); + + if (0 == stnsNum) { + ERROR_LOG("in LTP::postag, number of sentence equals 0"); + return kEmptyStringError; + } + + for (int i = 0; i < stnsNum; ++i) { + vector vecWord; + vector vecPOS; + + xml.GetWordsFromSentence(vecWord, i); + + if (0 == vecWord.size()) { + ERROR_LOG("Input sentence is empty."); + return kEmptyStringError; + } + + if (vecWord.size() > MAX_WORDS_NUM) { + ERROR_LOG("Input sentence is too long."); + return kSentenceTooLongError; + } + + if (0 == postagger_postag(postagger, vecWord, vecPOS)) { + ERROR_LOG("in LTP::postag, failed to perform postag on sent. #%d", i+1); + return kPostagError; + } + + if (xml.SetPOSsToSentence(vecPOS, i) != 0) { + ERROR_LOG("in LTP::postag, failed to write postag result to xml"); + return kWriteXmlError; + } + } + + xml.SetNote(NOTE_POS); + + return 0; +} + +// perform ner over xml +int LTP::ner(XML4NLP & xml) { + if ( xml.QueryNote(NOTE_NE) ) { + return 0; + } + + // dependency + int ret = postag(xml); + if (0 != ret) { + ERROR_LOG("in LTP::ner, failed to perform postag preprocess"); + return ret; + } + + void * ner = _resource.GetNER(); + + if (NULL == ner) { + ERROR_LOG("in LTP::ner, failed to init a ner."); + return kNERError; + } + + int stnsNum = xml.CountSentenceInDocument(); + + if (stnsNum == 0) { + ERROR_LOG("in LTP::ner, number of sentence equals 0"); + return kEmptyStringError; + } + + for (int i = 0; i < stnsNum; ++ i) { + vector vecWord; + vector vecPOS; + vector vecNETag; + + if (xml.GetWordsFromSentence(vecWord, i) != 0) { + ERROR_LOG("in LTP::ner, failed to get words from xml"); + return kReadXmlError; + } + + if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { + ERROR_LOG("in LTP::ner, failed to get postags from xml"); + return kNERError; + } + + if (0 == vecWord.size()) { + ERROR_LOG("Input sentence is empty."); + return kEmptyStringError; + } + + if (vecWord.size() > MAX_WORDS_NUM) { + ERROR_LOG("Input sentence is too long."); + return kSentenceTooLongError; + } + + if (0 == ner_recognize(ner, vecWord, vecPOS, vecNETag)) { + ERROR_LOG("in LTP::ner, failed to perform ner on sent. #%d", i+1); + return kNERError; + } + + xml.SetNEsToSentence(vecNETag, i); + } + + xml.SetNote(NOTE_NE); + return 0; +} + +int LTP::parser(XML4NLP & xml) { + if ( xml.QueryNote(NOTE_PARSER) ) return 0; + + int ret = postag(xml); + if (0 != ret) { + ERROR_LOG("in LTP::parser, failed to perform postag preprocessing"); + return ret; + } + + void * parser = _resource.GetParser(); + + if (parser == NULL) { + ERROR_LOG("in LTP::parser, failed to init a parser"); + return kParserError; + } + + int stnsNum = xml.CountSentenceInDocument(); + if (stnsNum == 0) { + ERROR_LOG("in LTP::parser, number of sentences equals 0"); + return kEmptyStringError; + } + + for (int i = 0; i < stnsNum; ++i) { + std::vector vecWord; + std::vector vecPOS; + std::vector vecHead; + std::vector vecRel; + + if (xml.GetWordsFromSentence(vecWord, i) != 0) { + ERROR_LOG("in LTP::parser, failed to get words from xml"); + return kReadXmlError; + } + + if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { + ERROR_LOG("in LTP::parser, failed to get postags from xml"); + return kReadXmlError; + } + + if (0 == vecWord.size()) { + ERROR_LOG("Input sentence is empty."); + return kEmptyStringError; + } + + if (vecWord.size() > MAX_WORDS_NUM) { + ERROR_LOG("Input sentence is too long."); + return kSentenceTooLongError; + } + + if (-1 == parser_parse(parser, vecWord, vecPOS, vecHead, vecRel)) { + ERROR_LOG("in LTP::parser, failed to perform parse on sent. #%d", i+1); + return kParserError; + } + + if (0 != xml.SetParsesToSentence(vecHead, vecRel, i)) { + ERROR_LOG("in LTP::parser, failed to write parse result to xml"); + return kWriteXmlError; + } + } + + xml.SetNote(NOTE_PARSER); + + return 0; +} + +int LTP::srl(XML4NLP & xml) { + if ( xml.QueryNote(NOTE_SRL) ) return 0; + + // dependency + int ret = ner(xml); + if (0 != ret) { + ERROR_LOG("in LTP::srl, failed to perform ner preprocess"); + return ret; + } + + ret = parser(xml); + if (0 != ret) { + ERROR_LOG("in LTP::srl, failed to perform parsing preprocess"); + return ret; + } + + int stnsNum = xml.CountSentenceInDocument(); + if (stnsNum == 0) { + ERROR_LOG("in LTP::srl, number of sentence equals 0"); + return kEmptyStringError; + } + + for (int i = 0; i < stnsNum; ++i) { + vector vecWord; + vector vecPOS; + vector vecNE; + vector< pair > vecParse; + vector< pair > > > > vecSRLResult; + + if (xml.GetWordsFromSentence(vecWord, i) != 0) { + ERROR_LOG("in LTP::ner, failed to get words from xml"); + return kReadXmlError; + } + + if (xml.GetPOSsFromSentence(vecPOS, i) != 0) { + ERROR_LOG("in LTP::ner, failed to get postags from xml"); + return kReadXmlError; + } + + if (xml.GetNEsFromSentence(vecNE, i) != 0) { + ERROR_LOG("in LTP::ner, failed to get ner result from xml"); + return kReadXmlError; + } + + if (xml.GetParsesFromSentence(vecParse, i) != 0) { + ERROR_LOG("in LTP::ner, failed to get parsing result from xml"); + return kReadXmlError; + } + + if (0 != srl_dosrl(vecWord, vecPOS, vecParse, vecSRLResult)) { + ERROR_LOG("in LTP::srl, failed to perform srl on sent. #%d", i+1); + return kSRLError; + } + + int j = 0; + for (; j < vecSRLResult.size(); ++j) { + vector vecType; + vector< pair > vecBegEnd; + int k = 0; + + for (; k < vecSRLResult[j].second.size(); ++k) { + vecType.push_back(vecSRLResult[j].second[k].first); + vecBegEnd.push_back(vecSRLResult[j].second[k].second); + } + + if (0 != xml.SetPredArgToWord(i, vecSRLResult[j].first, vecType, vecBegEnd)) { + return kWriteXmlError; + } + } + } + + xml.SetNote(NOTE_SRL); + return 0; +} diff --git a/src/ltp/Ltp.h b/src/ltp/Ltp.h index e448f95a1..48bc62b3f 100644 --- a/src/ltp/Ltp.h +++ b/src/ltp/Ltp.h @@ -1,137 +1,132 @@ -#ifndef __LTP_H__ -#define __LTP_H__ - -#include "LTPResource.h" -#include "xml4nlp/Xml4nlp.h" -#include -#include -#include -#include -#include -#include -#include - -#define MAX_SENTENCE_LEN 1024 -#define MAX_WORDS_NUM 256 - -#define LTP_SERVICE_NAME_SPLITSENT "sp" -#define LTP_SERVICE_NAME_SEGMENT "ws" -#define LTP_SERVICE_NAME_POSTAG "pos" -#define LTP_SERVICE_NAME_NER "ner" -#define LTP_SERVICE_NAME_DEPPARSE "dp" -#define LTP_SERVICE_NAME_SRL "srl" -#define LTP_SERVICE_NAME_ALL "all" -#define LTP_SERVICE_NAME_DEFAULT LTP_SERVICE_NAME_ALL - -#define LTP_SERVICE_OUTPUT_FORMAT_XML "xml" -#define LTP_SERVICE_OUTPUT_FORMAT_JSON "json" -#define LTP_SERVICE_OUTPUT_FORMAT_DEFAULT LTP_SERVICE_OUTPUT_FORMAT_XML - -enum ErrorCodes { - kEmptyStringError = 1, /*< The input sentence is empty */ - kSplitSentenceError, /*< Failed to perform split sentence */ - kWordsegError, /*< Failed to perform wordseg */ - kPostagError, /*< Failed to perform postag */ - kParserError, /*< Failed to perform parsing */ - kNERError, /*< Failed to perform NER */ - kSRLError, /*< Failed to perform SRL */ - kEncodingError, /*< Sentence encoding not in UTF-8 */ - kXmlParseError, /*< Input xml is not well formatted */ - kSentenceTooLongError, /*< More than 300 characters or 70 words */ - kReadXmlError, /*< Failed to read XML in internal process */ - kWriteXmlError, /*< Failed to write XML in internal process */ -}; - -class LTP { -public: - static const int kActiveSegmentor = 1<<1; - static const int kActivePostagger = 1<<2; - static const int kActiveNER = 1<<3; - static const int kActiveParser = 1<<4; - static const int kActiveSRL = 1<<5; - -public: - LTP(const std::string& last_stage, - const std::string& segmentor_model_file, - const std::string& segmentor_lexicon_file, - const std::string& postagger_model_file, - const std::string& postagger_lexicon_file, - const std::string& ner_model_file, - const std::string& parser_model_file, - const std::string& srl_model_dir); - - ~LTP(); //! The deallocator - bool loaded() const; //! return true on the resource successful loaded, otherwise false - - // discard - // int CreateDOMFromTxt(const char * cszTxtFileName, XML4NLP& m_xml4nlp); - - // discard - // int CreateDOMFromXml(const char * cszXmlFileName, XML4NLP& m_xml4nlp); - - // save dom tree - // int SaveDOM(const char *cszSaveFileName, XML4NLP& m_xml4nlp); - - /* - * do word segmentation. - * - * @param[in/out] xml the xml storing ltp result - * @return int 0 on success, otherwise -1 - */ - int wordseg(XML4NLP & xml); - - /* - * do postagging - * - * @param[in/out] xml the xml storing ltp result - * @return int 0 on success, otherwise -1 - */ - int postag(XML4NLP & xml); - - /* - * do name entities recognization - * - * @param[in/out] xml the xml storing ltp result - * @return int 0 on success, otherwise -1 - */ - int ner(XML4NLP & xml); - - /* - * do dependency parsing - * - * @param[in/out] xml the xml storing ltp result - * @return int 0 on success, otherwise -1 - */ - int parser(XML4NLP & xml); - - /* - * do semantic role labeling - * - * @param[in/out] xml the xml storing ltp result - * @return int 0 on success, otherwise -1 - */ - int srl(XML4NLP & xml); - - int splitSentence_dummy(XML4NLP & xml); -private: - /* - * parse the config file, and load resource according the config - * - * @param[in] confFileName the config file - * @return int 0 on success, otherwise -1 - */ - bool load(const std::string& last_stage, - const std::string& segmentor_model_file, - const std::string& segmentor_lexicon_file, - const std::string& postagger_model_file, - const std::string& postagger_lexicon_file, - const std::string& ner_model_file, - const std::string& parser_model_file, - const std::string& srl_model_dir); - -private: - LTPResource _resource; /*< the ltp resources */ - bool _loaded; /*< use to sepcify if the resource is loaded */ -}; - -#endif // end for __LTP_H__ +#ifndef __LTP_H__ +#define __LTP_H__ + +#include "LTPResource.h" +#include "config.h" +#include "xml4nlp/Xml4nlp.h" +#include +#include +#include +#include +#include +#include +#include + +#define MAX_SENTENCE_LEN 1024 +#define MAX_WORDS_NUM 256 + +#define LTP_SERVICE_NAME_SEGMENT "ws" +#define LTP_SERVICE_NAME_POSTAG "pos" +#define LTP_SERVICE_NAME_NER "ner" +#define LTP_SERVICE_NAME_DEPPARSE "dp" +#define LTP_SERVICE_NAME_SRL "srl" +#define LTP_SERVICE_NAME_ALL "all" + +enum ErrorCodes { + kEmptyStringError = 1, /*< The input sentence is empty */ + kSplitSentenceError, /*< Failed to perform split sentence */ + kWordsegError, /*< Failed to perform wordseg */ + kPostagError, /*< Failed to perform postag */ + kParserError, /*< Failed to perform parsing */ + kNERError, /*< Failed to perform NER */ + kSRLError, /*< Failed to perform srl */ + kEncodingError, /*< Sentence encoding not in UTF-8 */ + kXmlParseError, /*< Input xml is not well formatted */ + kSentenceTooLongError, /*< More than 300 characters or 70 words */ + kReadXmlError, /*< Failed to read XML in internal process */ + kWriteXmlError, /*< Failed to write XML in internal process */ +}; + +class LTP { +public: + static const int kActiveSegmentor = 1<<1; + static const int kActivePostagger = 1<<2; + static const int kActiveNER = 1<<3; + static const int kActiveParser = 1<<4; + static const int kActiveSRL = 1<<5; + +public: + LTP(const std::string& last_stage, + const std::string& segmentor_model_file, + const std::string& segmentor_lexicon_file, + const std::string& postagger_model_file, + const std::string& postagger_lexicon_file, + const std::string& ner_model_file, + const std::string& parser_model_file, + const std::string& srl_model_dir); + + ~LTP(); //! The deallocator + bool loaded() const; //! return true on the resource successful loaded, otherwise false + + // discard + // int CreateDOMFromTxt(const char * cszTxtFileName, XML4NLP& m_xml4nlp); + + // discard + // int CreateDOMFromXml(const char * cszXmlFileName, XML4NLP& m_xml4nlp); + + // save dom tree + // int SaveDOM(const char *cszSaveFileName, XML4NLP& m_xml4nlp); + + /* + * do word segmentation. + * + * @param[in/out] xml the xml storing ltp result + * @return int 0 on success, otherwise -1 + */ + int wordseg(XML4NLP & xml); + + /* + * do postagging + * + * @param[in/out] xml the xml storing ltp result + * @return int 0 on success, otherwise -1 + */ + int postag(XML4NLP & xml); + + /* + * do name entities recognization + * + * @param[in/out] xml the xml storing ltp result + * @return int 0 on success, otherwise -1 + */ + int ner(XML4NLP & xml); + + /* + * do dependency parsing + * + * @param[in/out] xml the xml storing ltp result + * @return int 0 on success, otherwise -1 + */ + int parser(XML4NLP & xml); + + /* + * do semantic role labeling + * + * @param[in/out] xml the xml storing ltp result + * @return int 0 on success, otherwise -1 + */ + int srl(XML4NLP & xml); + + int splitSentence_dummy(XML4NLP & xml); +private: + /* + * parse the config file, and load resource according the config + * + * @param[in] confFileName the config file + * @return int 0 on success, otherwise -1 + */ + bool load(const std::string& last_stage, + const std::string& segmentor_model_file, + const std::string& segmentor_lexicon_file, + const std::string& postagger_model_file, + const std::string& postagger_lexicon_file, + const std::string& ner_model_file, + const std::string& parser_model_file, + const std::string& srl_model_file); + +private: + LTPResource _resource; /*< the ltp resources */ + bool _loaded; /*< use to sepcify if the resource is loaded */ +}; + +#endif // end for __LTP_H__ diff --git a/src/server/CMakeLists.txt b/src/server/CMakeLists.txt index ed508e2d5..1815759fb 100644 --- a/src/server/CMakeLists.txt +++ b/src/server/CMakeLists.txt @@ -3,7 +3,7 @@ include_directories (./ ${THIRDPARTY_DIR}/boost/include/ ${THIRDPARTY_DIR}/maxent/ ${THIRDPARTY_DIR}/tinyxml/ - ${THIRDPARTY_DIR}/jsoncpp/include/) + ${THIRDPARTY_DIR}/jsoncpp/include) set (ltp_server_SRC ltp_server.cpp mongoose.c mongoose.h) link_directories ( ${LIBRARY_OUTPUT_PATH} ) @@ -21,5 +21,7 @@ target_link_libraries (ltp_server pthread boost_program_options_static_lib boost_regex_static_lib + boost_serialization_static_lib dl - jsoncpp) + dynet + jsoncpp_lib_static) diff --git a/src/server/ltp_server.cpp b/src/server/ltp_server.cpp index 3fa22bd73..2074a9fa4 100644 --- a/src/server/ltp_server.cpp +++ b/src/server/ltp_server.cpp @@ -11,7 +11,6 @@ #include "utils/strutils.hpp" #include "utils/logging.hpp" #include "utils/codecs.hpp" -#include "utils/xml4nlp_helper.h" #include "json/json.h" #define POST_LEN 1024 @@ -54,7 +53,7 @@ int main(int argc, char *argv[]) { "- " LTP_SERVICE_NAME_POSTAG ": Part of speech tagging\n" "- " LTP_SERVICE_NAME_NER ": Named entity recognization\n" "- " LTP_SERVICE_NAME_DEPPARSE ": Dependency parsing\n" - "- " LTP_SERVICE_NAME_SRL ": Semantic role labeling (equals to all)\n" + "- " LTP_SERVICE_NAME_SRL ": Semantic role labeling\n" "- all: The whole pipeline [default]") ("segmentor-model", value(), "The path to the segment model [default=ltp_data/cws.model].") @@ -68,8 +67,8 @@ int main(int argc, char *argv[]) { "The path to the NER model [default=ltp_data/ner.model].") ("parser-model", value(), "The path to the parser model [default=ltp_data/parser.model].") - ("srl-data", value(), - "The path to the SRL model directory [default=ltp_data/srl_data/].") + ("srl-model", value(), + "The path to the srl model [default=ltp_data/pisrl.model].") ("log-level", value(), "The log level:\n" "- 0: TRACE level\n" "- 1: DEBUG level\n" @@ -115,9 +114,9 @@ int main(int argc, char *argv[]) { && last_stage != LTP_SERVICE_NAME_NER && last_stage != LTP_SERVICE_NAME_DEPPARSE && last_stage != LTP_SERVICE_NAME_SRL - && last_stage != LTP_SERVICE_NAME_ALL) { + && last_stage != "all") { std::cerr << "Unknown stage name:" << last_stage << ", reset to 'all'" << std::endl; - last_stage = LTP_SERVICE_NAME_ALL; + last_stage = "all"; } } @@ -150,10 +149,10 @@ int main(int argc, char *argv[]) { if (vm.count("parser-model")) { parser_model= vm["parser-model"].as(); } - - std::string srl_data= "ltp_data/srl/"; - if (vm.count("srl-data")) { - srl_data = vm["srl-data"].as(); + INFO_LOG("parser model after vm :\"%s\"", parser_model.c_str()); + std::string srl_model= "ltp_data/pisrl.model"; + if (vm.count("srl-model")) { + srl_model = vm["srl-model"].as(); } int log_level = LTP_LOG_INFO; @@ -167,7 +166,7 @@ int main(int argc, char *argv[]) { } engine = new LTP(last_stage, segmentor_model, segmentor_lexicon, postagger_model, - postagger_lexcion, ner_model, parser_model, srl_data); + postagger_lexcion, ner_model, parser_model, srl_model); if (!engine->loaded()) { ERROR_LOG("Failed to setup LTP engine."); @@ -255,6 +254,119 @@ static void ErrorResponse(struct mg_connection* conn, } } +static std::string xml2jsonstr(const XML4NLP & xml, std::string str_type) { + Json::Value root; + + int paragraphNum = xml.CountParagraphInDocument(); + + for (int pid = 0; pid < paragraphNum; ++ pid) { + Json::Value paragraph; + + int stnsNum = xml.CountSentenceInParagraph(pid); + for (int sid = 0; sid < stnsNum; ++sid) { + Json::Value sentence; + + std::vector vecWord; + std::vector vecPOS; + std::vector vecNETag; + std::vector> vecParse; + //std::vector> vecSemResult; + std::vector>> vecSemResult; + std::vector > > > > vecSRLResult; + + // seg + xml.GetWordsFromSentence(vecWord, pid, sid); + + // postag + if (str_type == LTP_SERVICE_NAME_POSTAG + || str_type == LTP_SERVICE_NAME_NER + || str_type == LTP_SERVICE_NAME_DEPPARSE + || str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + xml.GetPOSsFromSentence(vecPOS, pid, sid); + } + + // ner + if (str_type == LTP_SERVICE_NAME_NER + || str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + xml.GetNEsFromSentence(vecNETag, pid, sid); + } + + // dp + if (str_type == LTP_SERVICE_NAME_DEPPARSE + || str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + xml.GetParsesFromSentence(vecParse, pid, sid); + } + + // srl + if (str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + // get by word + } + + for (int wid = 0; wid < vecWord.size(); ++wid) { + Json::Value word; + word["id"] = wid; + word["cont"] = vecWord[wid]; + + // postag + if (str_type == LTP_SERVICE_NAME_POSTAG + || str_type == LTP_SERVICE_NAME_NER + || str_type == LTP_SERVICE_NAME_DEPPARSE + || str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + word["pos"] = vecPOS[wid]; + + } + + // ner + if (str_type == LTP_SERVICE_NAME_NER + || str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + word["ne"] = vecNETag[wid]; + } + + // dp + if (str_type == LTP_SERVICE_NAME_DEPPARSE + || str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + word["parent"] = vecParse[wid].first; + word["relate"] = vecParse[wid].second; + } + + // srl + if (str_type == LTP_SERVICE_NAME_SRL + || str_type == LTP_SERVICE_NAME_ALL) { + Json::Value args; + std::vector vecType; + std::vector> vecBegEnd; + xml.GetPredArgToWord(pid, sid, wid, vecType, vecBegEnd); + if (vecType.size() != 0) { + for (int arg_id = 0; arg_id < vecType.size(); ++arg_id) { + Json::Value arg; + arg["id"] = arg_id; + arg["type"] = vecType[arg_id]; + arg["beg"] = vecBegEnd[arg_id].first; + arg["end"] = vecBegEnd[arg_id].second; + args.append(arg); + } + } else { + args.resize(0); + } + word["arg"] = args; + } + + sentence.append(word); + } + + paragraph.append(sentence); + } // sentence + root.append(paragraph); + } // paragraph + return root.toStyledString(); +} static int Service(struct mg_connection *conn) { char *sentence; @@ -383,13 +495,13 @@ static int Service(struct mg_connection *conn) { ErrorResponse(conn, static_cast(ret)); return 0; } - } else if (str_type == LTP_SERVICE_NAME_SRL){ + } else if (str_type == LTP_SERVICE_NAME_SRL){ // srl int ret = engine->srl(xml4nlp); if (0 != ret) { ErrorResponse(conn, static_cast(ret)); return 0; } - } else { + } else { // all str_type = LTP_SERVICE_NAME_ALL; int ret = engine->srl(xml4nlp); if (0 != ret) { @@ -401,10 +513,10 @@ static int Service(struct mg_connection *conn) { TRACE_LOG("Analysis is done."); std::string strResult; - if (str_format == LTP_SERVICE_OUTPUT_FORMAT_JSON) { - strResult = ltp::utility::xml2jsonstr(xml4nlp, str_type); - } else { //xml + if (str_format == "xml") { xml4nlp.SaveDOM(strResult); + } else { //json + strResult = xml2jsonstr(xml4nlp, str_type); } diff --git a/src/srl/CMakeLists.txt b/src/srl/CMakeLists.txt index cb1e15c2c..84a6ca26d 100644 --- a/src/srl/CMakeLists.txt +++ b/src/srl/CMakeLists.txt @@ -1,64 +1,64 @@ -include_directories ( - . - ${util_DIR} - ${SOURCE_DIR}/utils - ${THIRDPARTY_DIR}/boost/include - ${THIRDPARTY_DIR}/maxent) +include_directories (. + ${THIRDPARTY_DIR}/boost/include + ${THIRDPARTY_DIR}/eigen + ${THIRDPARTY_DIR}/dynet + ) + +link_directories(${THIRDPARTY_DIR}/dynet/build/dynet) # -L + +set(LIBS ${LIBS} boost_program_options_static_lib boost_serialization_static_lib) + +add_subdirectory(include) +include_directories(include) + +set(LIBS ${LIBS} base_static_lib) set (srl_VERSION "0.0.1") + +include_directories(common) +add_subdirectory(common) + +set(LIBS ${LIBS} common_static_lib) + +add_subdirectory(Pi) +#set(LIBS ${LIBS} srl_pi_static_lib) + +add_subdirectory(Srl) +set(LIBS ${LIBS} srl_srl_static_lib) + +add_subdirectory(tool) + set (srl_SRC - Configuration.cpp - Configuration.h - ConstVar.h - options.h - DataPreProcess.cpp - DataPreProcess.h - DataStruct.h - DepSRL.cpp - DepSRL.h - FeatureExtractor.cpp - FeatureExtractor.h - MyStruct.h - MyTree.cpp - MyTree.h - Sentence.cpp - Sentence.h - SRLBaseline.cpp - SRLBaselineExt.cpp - SRLBaselineExt.h - SRLBaseline.h - SRL_DLL.cpp - SRL_DLL.h - SRL_DLL_x.cpp - tree.hh) + DepSRL.cpp + DepSRL.h + SRL_DLL.cpp + SRL_DLL.h) + add_library (srl_static_lib STATIC ${srl_SRC}) +target_link_libraries (srl_static_lib dynet ${LIBS}) -set_target_properties (srl_static_lib PROPERTIES - OUTPUT_NAME srl) +set_target_properties (srl_static_lib PROPERTIES + OUTPUT_NAME srl) -add_library (srl_shared_lib SHARED ${srl_SRC}) +add_library(srl_shared_lib SHARED ${srl_SRC}) +target_link_libraries (srl_shared_lib dynet ${LIBS}) +set_target_properties (srl_shared_lib PROPERTIES + VERSION ${srl_VERSION} + OUTPUT_NAME srl) -set_target_properties (srl_shared_lib PROPERTIES - VERSION ${srl_VERSION} - OUTPUT_NAME srl) +#add_library (ppsrl_shared_lib SHARED ${srl_SRC}) +# +#set_target_properties (ppsrl_shared_lib PROPERTIES +# VERSION ${srl_VERSION} +# OUTPUT_NAME srl) -target_link_libraries (srl_static_lib maxent_static_lib) -target_link_libraries (srl_shared_lib maxent_shared_lib) +target_link_libraries (srl_static_lib dynet ${LIBS}) +#target_link_libraries (ppsrl_shared_lib maxent_shared_lib) configure_file ( - SRL_DLL.h - ${INCLUDE_OUTPUT_PATH}/ltp/SRL_DLL.h) + SRL_DLL.h + ${INCLUDE_OUTPUT_PATH}/ltp/srl_dll.h) link_directories (${LIBRARY_OUTPUT_PATH}) - -# Training suite is not supported in windows -if (NOT WIN32) -set (lgsrl_SRC lgsrl.cpp Corpus.cpp GetInstance.cpp) -add_executable (lgsrl ${lgsrl_SRC}) -target_link_libraries (lgsrl maxent srl_static_lib) -set_target_properties (lgsrl - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/) -endif() diff --git a/src/srl/Configuration.cpp b/src/srl/Configuration.cpp deleted file mode 100644 index 8eefbd0f5..000000000 --- a/src/srl/Configuration.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * File Name : Configuration.cpp - * Author : msmouse - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - * - */ - - -#include "Configuration.h" -#include -#include -#include - -using namespace std; - -void Configuration::load_xml(const string& filename) -{ - ifstream xml_file(filename.c_str()); - if (!xml_file) - { - throw runtime_error("Can't open the configuration file\n"); - } - vector lines; - lines.clear(); - string line; - while (getline(xml_file, line)) - { - trim(line); - lines.push_back(line); - } - - parse(lines); -} - -void Configuration::trim(string& line) -{ - size_t begin, end; - begin = line.find_first_not_of(" \t\n"); - end = line.find_last_not_of(" \t\n"); - line = line.substr(begin,end+1-begin); -} - -size_t Configuration::find( - const vector& lines, - const string& tag) const -{ - for (size_t i=0; i& lines) -{ - size_t language_begin = find(lines, ""); - size_t language_end = find(lines, ""); - - size_t pred_rg_begin = find(lines, ""); - size_t pred_rg_end = find(lines, ""); - size_t pred_cl_begin = find(lines, ""); - size_t pred_cl_end = find(lines, ""); - - size_t feat_begin = find(lines, ""); - size_t feat_end = find(lines, ""); - - size_t noun_POS_begin = find(lines, ""); - size_t noun_POS_end = find(lines, ""); - size_t verb_POS_begin = find(lines, ""); - size_t verb_POS_end = find(lines, ""); - - m_language = lines[language_begin+1]; - - vector vec; - vec.clear(); - for (size_t i = feat_begin+1; i < feat_end; i++) - { - vec.push_back(lines[i]); - } - m_argu_config.set_feature_names(vec); - - vec.clear(); - for (size_t i = pred_rg_begin+1; i < pred_rg_end; i++) - { - vec.push_back(lines[i]); - } - m_pred_recog_config.set_feature_names(vec); - - vec.clear(); - for (size_t i = pred_cl_begin+1; i < pred_cl_end; i++) - { - vec.push_back(lines[i]); - } - m_pred_class_config.set_feature_names(vec); - - m_noun_POS.clear(); - for (size_t i=noun_POS_begin+1; i -#include - -class PredClassConfig -{ - /* config for predicate classifier */ - public: - PredClassConfig() {}; - - void set_feature_names(const std::vector& _features) - { - m_feature_names = _features; - } - - const std::vector& get_feature_names() const - { - return m_feature_names; - } - - private: - std::vector m_feature_names; -}; - -class PredRecogConfig -{ - /* config for predicate recognizer */ - public: - PredRecogConfig() {}; - - void set_feature_names(const std::vector& _features) - { - m_feature_names = _features; - } - - const std::vector& get_feature_names() const - { - return m_feature_names; - } - - private: - std::vector m_feature_names; -}; - -class ArguConfig -{ - /* config for semantic role classifer */ - public: - ArguConfig(){}; - - void set_feature_names(const std::vector& _features) - { - m_feature_names = _features; - } - - const std::vector& get_feature_names() const - { - return m_feature_names; - } - - private: - std::vector m_feature_names; -}; - -class Configuration -{ - public: - Configuration(){}; - - explicit Configuration(const std::string filename) - { - load_xml(filename); - } - - void load_xml(const std::string& filename); - - std::string get_language() const - { - return m_language; - } - - PredClassConfig& get_pred_class_config() - { - return m_pred_class_config; - } - - PredRecogConfig& get_pred_recog_config() - { - return m_pred_recog_config; - } - - ArguConfig& get_argu_config() - { - return m_argu_config; - } - - const PredClassConfig& get_pred_class_config() const - { - return m_pred_class_config; - } - - const PredRecogConfig& get_pred_recog_config() const - { - return m_pred_recog_config; - } - - const ArguConfig& get_argu_config() const - { - return m_argu_config; - } - - bool is_verbPOS(const std::string& POS) const; - bool is_nounPOS(const std::string& POS) const; - - private: - /* parse the xml file (simple version) */ - void parse(const std::vector& lines); - - /* trim */ - void trim(std::string& line); - - size_t find(const std::vector& lines, const std::string& tag) const; - - private: - PredClassConfig m_pred_class_config; - PredRecogConfig m_pred_recog_config; - ArguConfig m_argu_config; - - std::string m_language; - - std::vector m_noun_POS; - std::vector m_verb_POS; -}; - -#endif diff --git a/src/srl/ConstVar.h b/src/srl/ConstVar.h deleted file mode 100644 index 4d4433947..000000000 --- a/src/srl/ConstVar.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * File Name : ConstVar.h - * Author : Frumes - * Create Time : 20061231 - * Project Name : NewSRLBaseLine - * Remark : define the constant variable used in the project, - * the variable is classified as char, char* and int. - */ - - -#ifndef _CONST_VAR_ -#define _CONST_VAR_ - -/*------------ const char type variable begin --------------*/ -//the B-I-E-S-O tag for name entity -const char C_NE_SINGLE = 'S'; -const char C_NE_BEGIN = 'B'; -const char C_NE_END = 'E'; -const char C_NE_IN = 'I'; -const char C_NE_OUT = 'O'; - -const char C_NE_SEP = '-'; // separate tag -const char C_END_CHAR = '\0'; // the end character of c type string -const char C_COMMENT_CHAR = '#'; // the comment character in configuration file -const char C_FEATTYPE_COMMENT = '$'; -const char C_UP = '>'; -const char C_DOWN = '<'; -const char C_ADD = '+'; -const char C_TAB = '\t'; -/*------------ const char type variable end --------------*/ - -/*-------- const char* const type variable begin --------*/ -const char* const S_ROOT_REL = "HED"; // root relation tag in dependency tree -const char* const S_NULL_REL = "NREL"; // relation of null node - -// family members relationship of tow tree node -const char* const S_FMS_PARENT = "FMSP"; -const char* const S_FMS_CHILD = "FMSC"; -const char* const S_FMS_SIBLING = "FMSS"; -const char* const S_FMS_ANCESTOR = "FMSAC"; -const char* const S_FMS_POSTERITY = "FMSPT"; -const char* const S_FMS_OTHER = "FMSO"; - -// position tag related to the predicate -const char* const S_PS_BEFORE = "PSB"; -const char* const S_PS_AFTER = "PSA"; -const char* const S_PS_PD = "PSP"; - -// the path feature related string -const char* const S_PATH_PD = "PD"; -const char* const S_PATH_UP =">"; -const char* const S_PATH_DOWN = "<"; - -// some null type tag -const char* const S_NULL_NE = "NNE"; -const char* const S_NULL_ARG = "NULL"; -const char* const S_NULL_WORD = "NWD"; -const char* const S_NULL_POS = "NPOS"; -const char* const S_NULL_STR = ""; -const char* const S_NULL_PD = "N-P"; - -const char* const S_VERB_POS = "v"; //the POS tag of verb -const char* const S_HYPHEN_TAG = "-"; //the hyphenation tag -const char* const S_STAR = "*"; -const char* const S_LEFT_BRACKET = "("; -const char* const S_RIGHT_BRACKET = ")"; - -// the null pattern features of predicate -const char* const S_NULL_POSPAT_PDCHR = "NPPPC"; -const char* const S_NULL_RELPAT_PDCHR = "NRPPC"; -const char* const S_NULL_POSPAT_PDSIBS = "NPPPS"; -const char* const S_NULL_RELPAT_PDSIBS = "NRPPS"; - -const char* const S_NULL_PD_CLASS = "NPDC"; //the predicate which can not find in dict -const char* const S_PD_ARG = "rel"; //the predicate arg label -/*------00- const char* const type variable end --------*/ - -/*------------ const int type variable begin ------------*/ -const int I_NULL_ID = -1; //the ID of null node -const int I_NULL_RIGHT = 10000; //the default null right ID -const int I_NULL_RCP = -1; //the tow node have no recent common parent - -//some const int number about name entity -const int I_NE_LENGTH = 4; -const int I_NE_FIRSTPS = 0; -const int I_NE_SEPPS = 1; -const int I_NE_BEGINPS = 2; -const int I_NE_SIZE = 2; - -const int I_PUN_PARENT_ID = -2; //the parent ID of punctuation character -const int I_HED_PARENT_ID = -1; // note: changed for PTBtoDep -const int I_NUMEXC = 1; //used for gold args file //changed for PTBtoDep -const int I_RADIX = 10; //the radix parameter of function: atoi - -const int I_FEATSEL_NUM = 64; //the number of features in the features-select configuration file -const int I_FEATCOMB_NUM = 32; //the number of features in the features-combine configuration file -const int I_WORD_LEN = 1024; //the length of a word -/*------------ const int type variable end ------------*/ - -/*------ some const variable for srl result combine ----*/ -const int I_SENT_IDX = 0; -const int I_PD_IDX = 1; -const int I_PS_BEG_IDX = 2; -const int I_PS_END_IDX = 3; -const int I_SENT_NUM = 1500; - -const double I_ARG_THRESHOLD_VAL = 0.5; - -const char C_PATTERN_SEP = '|'; - -const char* const S_QTY_ARG = "QTY"; -const char* const S_PSE_ARG = "PSE"; -const char* const S_PSR_ARG = "PSR"; -const char* const S_QTY_POS_PAT = "AD|CD|M|Q"; -const char* const S_ARG0_TYPE = "ARG0"; -/*------ some const variable for srl result combine ----*/ - - -#endif - diff --git a/src/srl/Corpus.cpp b/src/srl/Corpus.cpp deleted file mode 100644 index b6639168a..000000000 --- a/src/srl/Corpus.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * File Name : Corpus.cpp - * Author : msmouse - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - * - */ - - -#include "Corpus.h" -#include - -using namespace std; - -void Corpus::open_corpus(const string &filename) -{ - m_corpus.close(); - m_corpus.clear(); - - m_corpus.open(filename.c_str()); - if (!m_corpus) - { - throw runtime_error("Can't open corpus file"); - } -} - -bool Corpus::get_next_block(vector &lines) -{ - lines.clear(); - - /* if the file has already been read through, - * return false - */ - if (m_corpus.eof()) - return false; - - string line; - while (getline(m_corpus, line)) - { - if (string::npos == line.find_first_not_of("\t \n")) - { - if (lines.size() > 0) - { - return true; - } - } - else - { - lines.push_back(line); - } - } - - if (lines.size() > 0) - { - return true; - } - else // only blank line - { - return false; - } -} - diff --git a/src/srl/Corpus.h b/src/srl/Corpus.h deleted file mode 100644 index 128486f55..000000000 --- a/src/srl/Corpus.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * File Name : Corpus.h - * Author : msmouse - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - * - */ - - -#ifndef _CORPUS_H_ -#define _CORPUS_H_ - -#include -#include -#include - -class Corpus -{ -public: - Corpus() {} - - /* new Corpus corresponding to file "filename" */ - explicit Corpus(const std::string &filename) {open_corpus(filename);} - - ~Corpus() {} - - /* open a corpus file for input */ - void open_corpus(const std::string &filename); - - /* get the next block, blocks are separated with a blank line */ - bool get_next_block(std::vector &lines); - -private: - std::ifstream m_corpus; -}; - -#endif - diff --git a/src/srl/DataPreProcess.cpp b/src/srl/DataPreProcess.cpp deleted file mode 100644 index 47ebb1d80..000000000 --- a/src/srl/DataPreProcess.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * File Name : DataPreProcess.cpp - * Author : Frumes - * Create Time : 20061231 - * Project Name : NewSRLBaseLine - * Remark : get data from IR-LTP platform - * - */ - - -#include "DataPreProcess.h" - -DataPreProcess::DataPreProcess(const LTPData* ltpData) -{ - BuildStruct(ltpData); -} - -DataPreProcess::~DataPreProcess() -{ - DestroyStruct(); -} - -void DataPreProcess::BuildStruct(const LTPData* ltpData) -{ - m_ltpData = ltpData; - m_myTree = new MyTree(m_ltpData); - m_intItemNum = m_myTree->m_depTree.nodeNum; - - MapNEToCons(); //note: changed for PTBtoDep -} - -void DataPreProcess::DestroyStruct() -{ - delete m_myTree; - m_vecNE.clear(); -} - -void DataPreProcess::MapNEToCons() -{ - string strSingleNE; - string strExternNE; - DepNode dnNode; - - int index; - index = 0; - while (index < m_myTree->m_depTree.nodeNum) - { - m_myTree->GetNodeValue(dnNode, index); - - strSingleNE = SingleNE(dnNode.constituent.first, dnNode.constituent.second); - strExternNE = ExternNE(dnNode.constituent.first, dnNode.constituent.second); - if(strSingleNE.compare(S_NULL_NE)) - { - m_vecNE.push_back(strSingleNE); - } - else if(strExternNE.compare(S_NULL_NE)) - { - m_vecNE.push_back(strExternNE); - } - else - { - m_vecNE.push_back(S_NULL_NE); - } - - index++; - } - -} - -string DataPreProcess::SingleNE(int intBeg, int intEnd) const -{ - string strNETMP; - string strNE = S_NULL_STR; - string strNullNE = S_NULL_NE; - - strNE.resize(I_NE_SIZE); - if(intBeg == intEnd) - { - strNETMP = m_ltpData->vecNe.at(intBeg); - //match with "S-Nx" - if((strNETMP.length() == I_NE_LENGTH) && - (strNETMP[I_NE_FIRSTPS] == C_NE_SINGLE) && - (strNETMP[I_NE_SEPPS] == C_NE_SEP)) - { - strNE = strNETMP.substr(I_NE_BEGINPS, I_NE_SIZE); - return strNE; - } - else - { - return strNullNE; - } - } - else - { - return strNullNE; - } -} - -string DataPreProcess::ExternNE(int intBeg, int intEnd) const -{ - string strNETMPB; - string strNETMPE; - string strNETMP; - string strNE; - string strNullNE = S_NULL_NE; - - // being match "B-Nx" and end match "E-Nx", and other match "I-Nx" - strNETMPB = m_ltpData->vecNe.at(intBeg); - strNETMPE = m_ltpData->vecNe.at(intEnd); - strNE.resize(I_NE_SIZE); - - if((strNETMPB.length() == I_NE_LENGTH) && //length = 4 - (strNETMPE.length() == I_NE_LENGTH) && - (strNETMPB[I_NE_FIRSTPS] == C_NE_BEGIN) && //first char: B - (strNETMPE[I_NE_FIRSTPS] == C_NE_END) && //first char: E - (!strNETMPB.substr(I_NE_BEGINPS, I_NE_SIZE).compare(strNETMPE.substr(I_NE_BEGINPS, I_NE_SIZE))) //the Nx is the same - ) - { - //check the innr item - int i; - for(i = intBeg + 1 ; i < intEnd; i++) - { - strNETMP = m_ltpData->vecNe.at(i); - if(strNETMP[I_NE_FIRSTPS] != C_NE_IN) - { - return strNullNE; - } - } - - //asign ne type - strNE = strNETMPB.substr(I_NE_BEGINPS, I_NE_SIZE); - return strNE; - } - else - { - return strNullNE; - } -} - diff --git a/src/srl/DataPreProcess.h b/src/srl/DataPreProcess.h deleted file mode 100644 index 9d9ded685..000000000 --- a/src/srl/DataPreProcess.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * File Name : DataPreProcess.h - * Author : Frumes - * Create Time : 20061231 - * Project Name : NewSRLBaseLine - * Remark : get data from IR-LTP platform - * - */ - - -#ifndef __LTP_PROPRECESS__ -#define __LTP_PROPRECESS__ - -#include "MyTree.h" - -class DataPreProcess -{ - public: - DataPreProcess(const LTPData* ltpData); - ~DataPreProcess(); - - private: - void BuildStruct(const LTPData* ltpData); - void DestroyStruct(); - void MapNEToCons(); - - private: - string SingleNE(int intBeg, int intEnd) const; - string ExternNE(int intBeg, int intEnd) const; - - public: - - const LTPData* m_ltpData; - vector m_vecNE; - MyTree* m_myTree; - int m_intItemNum; //the Chinese word numbers after segmentation -}; - -#endif - diff --git a/src/srl/DataStruct.h b/src/srl/DataStruct.h deleted file mode 100644 index bb231fff1..000000000 --- a/src/srl/DataStruct.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * File Name : DataStruct.h - * Author : hjliu - * Time : 200644 - * Project : SRLBaseline - * Comment : describe the data structure used in srl baseline - * - * Copy Right: HIT-SCIR (c) 2006-2013, all rights reserved. - */ - -#ifndef _DATA_STRUCT_ -#define _DATA_STRUCT_ -#pragma warning(disable: 4284) - -#define STL_USING_ALL -#include -#include - -/* - * following defines some constant string - */ - -// relation -static const char *SBV = "SBV"; -static const char *VOB = "VOB"; -static const char *QUN = "QUN"; -static const char *ADV = "ADV"; - -static const int SBVID = 1; -static const int VOBID = 2; -static const int QUNID = 3; -static const int ADVID = 4; - -// pos -static const char *V = "v"; -static const char *NT = "nt"; -static const char *ND = "nd"; -static const char *NL = "nl"; -static const char *NS = "ns"; -static const char *P = "p"; -static const char *Q = "q"; - -// argument type -static const char *A0 = "Arg0"; -static const char *A1 = "Arg1"; -static const char *A0sQ = "Arg0-QTY"; -static const char *A1sQ = "Arg1-QTY"; -static const char *AMsTMP = "ArgM-TMP"; -static const char *AMsLOC = "ArgM-LOC"; -static const char *AMsDIR = "ArgM-DIR"; - -struct DepNode -{ - int parent; - deque dequeChildren; - string relation; - int id; - pair constituent; //the begin and end of the arg candidate -}; - -struct DepTree -{ - vector vecDepTree; - int nodeNum; -}; - -struct ArgInfo -{ - int id; - string type; - pair constituent; -}; - - -#endif - diff --git a/src/srl/DepSRL.cpp b/src/srl/DepSRL.cpp index a3d1a66d6..3496b7e5c 100644 --- a/src/srl/DepSRL.cpp +++ b/src/srl/DepSRL.cpp @@ -10,890 +10,182 @@ #include "DepSRL.h" -#include "FeatureExtractor.h" -#include "Configuration.h" -#include "boost/bind.hpp" -#include "boost/algorithm/string.hpp" +#include "extractor/ExtractorFileToWordEmb.h" +#include "vector" +#include "dynet/dynet.h" +#include "boost/archive/binary_iarchive.hpp" +#include "boost/archive/binary_oarchive.hpp" -// Load necessary resources into memory -int DepSRL::LoadResource(const string &ConfigDir) -{ - m_configXml = ConfigDir + "/Chinese.xml"; - m_selectFeats = ConfigDir + "/srl.cfg"; - // load srl and prg model - m_srlModel = new maxent::ME_Model; - bool tag = m_srlModel->load(ConfigDir + "/srl.model"); - if(!tag) { - return 0; - } - - m_prgModel = new maxent::ME_Model; - tag = m_prgModel->load(ConfigDir + "/prg.model"); - if(!tag) { - return 0; - } +using namespace std; +// Load necessary resources into memory +int DepSRL::LoadResource(const string &modelFile) +{ + dynet::DynetParams params; + params.mem_descriptor = "2000"; + dynet::initialize(params); + + ifstream in(modelFile); + if (!in) { return -1;} + boost::archive::binary_iarchive ia(in); + ia >> piConfig; + ia >> srlConfig; + ia >> embedding; + pi_model = new PiModel(piConfig); + pi_model->loadDict(ia); + pi_model->init(); + pi_model->loadModel(ia); + pi_model->initEmbedding(embedding); + srl_model = new SrlSrlModel(srlConfig); + srl_model->loadDict(ia); + srl_model->init(); + srl_model->loadModel(ia); + srl_model->initEmbedding(embedding); m_resourceLoaded = true; - - return true; + return 0; } // Release all resources int DepSRL::ReleaseResource() { - delete m_srlModel; - delete m_prgModel; - + delete srl_model; + delete pi_model; + embedding.clear(); m_resourceLoaded = false; - - return 1; -} -string DepSRL::GetConfigXml() -{ - return m_configXml; -} -string DepSRL::GetSelectFeats() -{ - return m_selectFeats; -} -int DepSRL::GetSRLResult( - const vector &words, - const vector &POSs, - const vector &NEs, - const vector< pair > &parse, - vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult - ) -{ - LTPData ltpData; - ltpData.vecWord = words; - ltpData.vecPos = POSs; - ltpData.vecNe = NEs; - - GetParAndRel(parse, ltpData.vecParent, ltpData.vecRelation); - - // construct a DataPreProcess instance - DataPreProcess* dataPreProc = new DataPreProcess(<pData); - - SRLBaselineExt * m_srlBaseline=new SRLBaselineExt(GetConfigXml(),GetSelectFeats()); - // extract features ! - m_srlBaseline->setDataPreProc(dataPreProc); - - // GetPredicateFromSentence(POSs,predicates); - vector predicates; - GetPredicateFromSentence(predicates,m_srlBaseline); - - // return GetSRLResult(words, POSs, NEs, parse, predicates, vecSRLResult); - return GetSRLResult(ltpData, predicates, vecSRLResult,m_srlBaseline); + return 0; } -// produce DepSRL result for a sentence -/* int DepSRL::GetSRLResult( const vector &words, const vector &POSs, - const vector &NEs, const vector< pair > &parse, - const vector &predicates, vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult ) { - LTPData ltpData; - ltpData.vecWord = words; - ltpData.vecPos = POSs; - ltpData.vecNe = NEs; - - // transform LTP parse result to parent-relation format - GetParAndRel(parse, ltpData.vecParent, ltpData.vecRelation); - - return GetSRLResult(ltpData, predicates, vecSRLResult); -} -*/ - -// produce DepSRL result for a sentence -int DepSRL::GetSRLResult( - const LTPData <pData, - const vector &predicates, - vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult, - SRLBaselineExt * m_srlBaseline) { vecSRLResult.clear(); - - if ( !m_resourceLoaded ) { - cerr<<"Resources not loaded."< adists = pi_model->label(hg, sentence); + pi_model->ExtractResults(hg, adists, sentence); + } + if ( !sentence.getPredicateList().size() ) { // skip all processing if no predicate return 1; } - - VecFeatForSent vecAllFeatures; //the features interface for SRLBaseline - VecPosForSent vecAllPos; //the constituent position vector - vector< vector< pair > > vecAllPairMaxArgs; - vector< vector< pair > > vecAllPairNextArgs; - - // extract features - if (!ExtractSrlFeatures(ltpData, predicates,vecAllFeatures,vecAllPos,m_srlBaseline)) - return 0; - - // predict - if (!Predict(vecAllFeatures,vecAllPairMaxArgs,vecAllPairNextArgs)) - return 0; - - // form the result - if (!FormResult( - ltpData.vecWord,ltpData.vecPos, predicates,vecAllPos, - vecAllPairMaxArgs,vecAllPairNextArgs, - vecSRLResult - ) - ) return 0; - - // rename arguments to short forms (ARGXYZ->AXYZ) - if (!RenameArguments(vecSRLResult)) return 0; - delete m_srlBaseline; - - return 1; -} - -int DepSRL::ExtractSrlFeatures( - const LTPData <pData, - const vector &VecAllPredicates, - VecFeatForSent &vecAllFeatures, - VecPosForSent &vecAllPos, - SRLBaselineExt* m_srlBaseline - ) -{ - vecAllFeatures.clear(); - vecAllPos.clear(); - - /* - // construct a DataPreProcess instance - DataPreProcess* dataPreProc = new DataPreProcess(<pData); - - // extract features ! - m_srlBaseline->setDataPreProc(dataPreProc); - */ - - m_srlBaseline->SetPredicate(VecAllPredicates); - m_srlBaseline->ExtractSrlFeatures(vecAllFeatures, vecAllPos); - - return 1; -} - -int DepSRL::Predict( - VecFeatForSent &vecAllFeatures, - vector< vector< pair > > &vecAllPairMaxArgs, - vector< vector< pair > > &vecAllPairNextArgs - ) -{ - vector< pair > vecPredPairMaxArgs; - vector< pair > vecPredPairNextArgs; - - for(VecFeatForSent::iterator predicate_iter = vecAllFeatures.begin(); - predicate_iter != vecAllFeatures.end(); - ++predicate_iter - ){// for each predicate - vecPredPairMaxArgs.clear(); - vecPredPairNextArgs.clear(); - - for(VecFeatForVerb::iterator position_iter = (*predicate_iter).begin(); - position_iter != (*predicate_iter).end(); - ++position_iter - ) {// for each position - vector > outcome; - - maxent::ME_Sample sample(*position_iter); - m_srlModel->predict(sample, outcome); - // m_srlModel->eval_all((*position_iter),outcome); - - vecPredPairMaxArgs.push_back(outcome[0]); - vecPredPairNextArgs.push_back(outcome[1]); - } - - vecAllPairMaxArgs.push_back(vecPredPairMaxArgs); - vecAllPairNextArgs.push_back(vecPredPairNextArgs); + // srl prediction + { + ComputationGraph hg; + vector adists = srl_model->label(hg, sentence); + srl_model->ExtractResults(hg, adists, sentence); } - return 1; + if (!FormResult(words, POSs, sentence.getPredicateList(), sentence, vecSRLResult)) + return -1; + return 0; } int DepSRL::FormResult( const vector &words, const vector &POSs, - const vector &VecAllPredicates, - VecPosForSent &vecAllPos, - vector< vector< pair > > &vecAllPairMaxArgs, - vector< vector< pair > > &vecAllPairNextArgs, + const vector &VecAllPredicates, + SrlPiSample& sentence, vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult ) { vecSRLResult.clear(); - vector< pair< string, pair< int, int > > > vecResultForOnePredicate; - + vector< pair > childArea; + GetChildArea(sentence, childArea); for (size_t idx=0; idx 1 ) { - vecResultForOnePredicate.pop_back(); // pop the "V" arg - vecSRLResult.push_back(make_pair(predicate_position,vecResultForOnePredicate)); - } - //vecResultForOnePredicate.pop_back(); // pop the "V" arg - //vecSRLResult.push_back(make_pair(predicate_position,vecResultForOnePredicate)); + vector< pair< string, pair< int, int > > > vecResultForOnePredicate; + vector args; + for (int w = 0; w < sentence.size(); w++) args.push_back(sentence.getWord(w).getArgs()[idx]); + ProcessOnePredicate(words, POSs, predicate_position, args, childArea, vecResultForOnePredicate); + if (vecResultForOnePredicate.size()) + vecSRLResult.push_back(make_pair(predicate_position, vecResultForOnePredicate)); } - return 1; } -// result forming form one predicate, based on hjliu's original function +void DepSRL::GetChildArea(SrlPiSample& sentence, vector< pair >& childArea) { + childArea.resize(sentence.size()); + for (int w = 0; w < sentence.size(); ++w) { + childArea[w].first = childArea[w].second = w; + } + for (int w = 0; w < sentence.size(); ++w) { + for (int p = sentence.getWord(w).getParent(); p != -1; p = sentence.getWord(p).getParent()) { + if (w < childArea[p].first) childArea[p].first = w; + if (w > childArea[p].second) childArea[p].second = w; + } + } +} + void DepSRL::ProcessOnePredicate( const vector& vecWords, const vector& vecPos, int intPredicates, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, + const vector& args, + const vector< pair >& childArea, vector< pair< string, pair< int, int > > > &vecResultForOnePredicate ) { - vector< pair > vecPairPSBuf; - vector< pair > vecPairMaxArgBuf; - vector< pair > vecPairNextArgBuf; - - //step1. remove the null label - vector< pair > vecPairNNLMax; - vector< pair > vecPairNNLNext; - vector< pair > vecPairNNLPS; - RemoveNullLabel(vecPairMaxArgs, vecPairNextArgs, vecPairPS, vecPairNNLMax, vecPairNNLNext, vecPairNNLPS); - - // step 2. insert the args - vector vecItem; - for (int index = 0; index < vecPairNNLPS.size(); index++) - { - InsertOneArg( vecPairNNLPS.at(index), vecPairNNLMax.at(index), vecPairNNLNext.at(index), vecPairPSBuf, vecPairMaxArgBuf, vecPairNextArgBuf ) ; - } + vecResultForOnePredicate.resize(0); - // step 3. insert predicate node - if ( IsInsertPredicate(intPredicates, vecPairMaxArgBuf, vecPairPSBuf) ) - { - pair prPdPS; - pair prPdArg; - prPdPS.first = intPredicates; - prPdPS.second = intPredicates; - prPdArg.first = S_PD_ARG; - prPdArg.second = 1; + //step1. insert label other than nil + for (int j = 0; j < args.size(); ++j) { + if (args[j] != NIL_LABEL) vecResultForOnePredicate.push_back(make_pair(args[j], childArea[j])); + } - vecPairPSBuf.push_back(prPdPS); - vecPairMaxArgBuf.push_back(prPdArg); - vecPairNextArgBuf.push_back(prPdArg); - } + //step2. process the collision + ProcessCollisions(intPredicates, vecResultForOnePredicate); - // step 4. post process - PostProcess(vecPos, vecPairPS, vecPairMaxArgs, vecPairNextArgs, vecPairPSBuf, vecPairMaxArgBuf, vecPairNextArgBuf); + //step3. process the same tags + // pass 当之前的arg概率小于0.5,而且该arg概率更大时,插入重复论元 - // put into output vector - for (int index = 0; index < vecPairPSBuf.size(); index++) - { - vecResultForOnePredicate.push_back(make_pair(vecPairMaxArgBuf[index].first, vecPairPSBuf[index])); - } + //step4. post process + QTYArgsProcess(vecPos, vecResultForOnePredicate); } -void DepSRL::RemoveNullLabel(const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - const vector< pair >& vecPairPS, - vector< pair >& vecPairNNLMax, - vector< pair >& vecPairNNLNext, - vector< pair >& vecPairNNLPS) const -{ - vecPairNNLMax.clear(); - vecPairNNLNext.clear(); - vecPairNNLPS.clear(); - for (int index = 0; index < vecPairMaxArgs.size(); index++) - { - if ( vecPairMaxArgs.at(index).first.compare(S_NULL_ARG) ) - { - vecPairNNLMax.push_back(vecPairMaxArgs.at(index)); - vecPairNNLNext.push_back(vecPairNextArgs.at(index)); - vecPairNNLPS.push_back(vecPairPS.at(index)); - } - } -} - -void DepSRL::InsertOneArg(const pair& pArgPS, - const pair& pMaxArg, - const pair& pNextArg, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairNextArgBuf) const -{ - // 2.1. process the collision - vector vctCol; - FindCollisionCand(vecPairPSBuf, pArgPS, vctCol); - if ( !IsInsertNColLabel(vctCol, pMaxArg, vecPairMaxArgBuf, vecPairNextArgBuf, vecPairPSBuf) ) - { - // insert current node - // vecPairMaxArgBuf.push_back(pMaxArg); - // vecPairNextArgBuf.push_back(pNextArg); - // vecPairPSBuf.push_back(pArgPS); - - // process next arg +void DepSRL::ProcessCollisions(int intPredicates, vector > > &results) { + for (int j = 0; j < results.size(); ++j) { + for (int k = 0; k < results.size(); ++k) { + if ((results[k].second.first <= intPredicates && intPredicates <= results[k].second.second) + || + (j != k && results[j].second.first <= results[k].second.first && results[k].second.second <= results[j].second.second)) { + // k including predicate or j including k + // remove k + results.erase(results.begin() + k); + ProcessCollisions(intPredicates, results); return; + } } - - // 2.2. process the same args - vector vctSame; - vector vctSameDel; - FindSameLabelCand(vecPairMaxArgBuf, pMaxArg, vctSame); - if ( !IsInsertSameLabel(vctSame, pMaxArg, vecPairMaxArgBuf, vecPairNextArgBuf, vecPairPSBuf, vctSameDel) ) - { - // insert current node - // vecPairMaxArgBuf.push_back(pMaxArg); - // vecPairNextArgBuf.push_back(pNextArg); - // vecPairPSBuf.push_back(pArgPS); - - // process next arg - return; - } - - // 2.3 insert current node - // remove collisions and same-args - // BOOST_FOREACH (int id, vctCol) { - for(int id = 0; id < vctCol.size(); id++) { - vecPairMaxArgBuf[id].second = -1; - vecPairNextArgBuf[id].second = -1; - vecPairPSBuf[id].second = -1; - } - // BOOST_FOREACH (int id, vctSameDel) { - for(int id = 0; id < vctSameDel.size(); id++) { - vecPairMaxArgBuf[id].second = -1; - vecPairNextArgBuf[id].second = -1; - vecPairPSBuf[id].second = -1; - } - vecPairMaxArgBuf.erase( - remove_if( - vecPairMaxArgBuf.begin(), - vecPairMaxArgBuf.end(), - boost::bind( - less(), - boost::bind( - &pair::second, - _1 - ), - 0 - ) - ), - vecPairMaxArgBuf.end() - ); - vecPairNextArgBuf.erase( - remove_if( - vecPairNextArgBuf.begin(), - vecPairNextArgBuf.end(), - boost::bind( - less(), - boost::bind( - &pair::second, - _1 - ), - 0 - ) - ), - vecPairNextArgBuf.end() - ); - vecPairPSBuf.erase( - remove_if( - vecPairPSBuf.begin(), - vecPairPSBuf.end(), - boost::bind( - less(), - boost::bind( - &pair::second, - _1 - ), - 0 - ) - ), - vecPairPSBuf.end() - ); - vecPairMaxArgBuf.push_back(pMaxArg); - vecPairNextArgBuf.push_back(pNextArg); - vecPairPSBuf.push_back(pArgPS); -} - -bool DepSRL::IsInsertPredicate(int intPredicate, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairPSBuf) const -{ - for(int index = 0; index < vecPairPSBuf.size(); index++) - { - if ( (vecPairPSBuf.at(index).first <= intPredicate) && - (vecPairPSBuf.at(index).second >= intPredicate) ) - { - vecPairPSBuf.at(index).first = intPredicate; - vecPairPSBuf.at(index).second = intPredicate; - vecPairMaxArgBuf.at(index).first = S_PD_ARG; - vecPairMaxArgBuf.at(index).second = 1; - - return 0; - } - } - - return 1; -} - -/* -void DepSRL::TransVector(const vector& vecInStr, - vector& vecOutStr) const -{ - vector::const_iterator itInStr; - itInStr = vecInStr.begin(); - while (itInStr != vecInStr.end()) - { - vecOutStr.push_back(*itInStr); - itInStr++; - } -} -*/ - -void DepSRL::GetParAndRel(const vector< pair >& vecParser, - vector& vecParent, - vector& vecRelation) const -{ - vector< pair >::const_iterator itParser; - pair pairParser; - - itParser = vecParser.begin(); - while(itParser != vecParser.end()) - { - pairParser = *itParser; - vecParent.push_back(pairParser.first); - vecRelation.push_back(pairParser.second); - ++ itParser; - } + } } -void DepSRL::GetPredicateFromSentence(const vector& vecPos, - vector& vecPredicate,SRLBaselineExt* m_srlBaseline) const +void DepSRL::QTYArgsProcess(const vector& vecPos, vector< pair< string, pair< int, int > > > &vecResultForOnePredicate) const { - int index; - vector::const_iterator itPos; - index = 0; - itPos = vecPos.begin(); - while (itPos != vecPos.end()) - { - if (m_srlBaseline->isVerbPOS(*itPos)) - { - vecPredicate.push_back(index); - } - - ++ index; - ++ itPos; - } -} - -void DepSRL::GetPredicateFromSentence(vector& vecPredicate,SRLBaselineExt * m_srlBaseline) const -{ - /* extract features for each word in sentence */ - vector< vector > vecFeatures; - m_srlBaseline->ExtractPrgFeatures(vecFeatures); - - /* predict */ - for (size_t i = 0; i < vecFeatures.size(); ++i) - { - maxent::ME_Sample sample(vecFeatures[i]); - vector< pair > prediction; - m_prgModel->predict(sample, prediction); - if (prediction[0].first == "Y") - vecPredicate.push_back(i); - } -} - -void DepSRL::PostProcess(const vector& vecPos, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const -{ - // step 1. process QTY args - QTYArgsProcess(vecPos, vecPairPSBuf, vecPairMaxArgsBuf, vecPairNextArgsBuf); - - // step 2. process PSR-PSE arg - PSERArgsProcess(S_ARG0_TYPE, vecPos, vecPairPS, vecPairMaxArgs, vecPairNextArgs, vecPairPSBuf, vecPairMaxArgsBuf, vecPairNextArgsBuf); -} - - -void DepSRL::FindCollisionCand(const vector< pair >& vecPairPSCands, - const pair& pairCurPSCand, - vector& vecPairColPSCands) const -{ - vecPairColPSCands.clear(); - for (int index = 0; index < vecPairPSCands.size(); index++) - { - if ( ((pairCurPSCand.first >= vecPairPSCands.at(index).first) && (pairCurPSCand.first <= vecPairPSCands.at(index).second)) || - ((pairCurPSCand.second >= vecPairPSCands.at(index).first) && (pairCurPSCand.second <= vecPairPSCands.at(index).second)) || - ((pairCurPSCand.first <= vecPairPSCands.at(index).first) && (pairCurPSCand.second >= vecPairPSCands.at(index).second)) ) - { - vecPairColPSCands.push_back(index); - } - } -} - -// format: (argType, argProp) -void DepSRL::FindSameLabelCand( - const vector< pair >& vecPairArgCands, - const pair& pairCurArgCand, - vector& vecPairSameArgCands) const -{ - vecPairSameArgCands.clear(); - for (int index = 0; index < vecPairArgCands.size(); index++) - { - if ( !pairCurArgCand.first.compare(vecPairArgCands.at(index).first) ) - { - vecPairSameArgCands.push_back(index); - } - } -} - -void DepSRL::QTYArgsProcess( - const vector& vecPos, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const -{ - vector< pair > vecPairPSTemp(vecPairPSBuf); - vector< pair > vecPairMaxArgsTemp(vecPairMaxArgsBuf); - vector< pair > vecPairNextArgsTemp(vecPairNextArgsBuf); - - vecPairPSBuf.clear(); - vecPairMaxArgsBuf.clear(); - vecPairNextArgsBuf.clear(); - // process rule : if (arg_type is "*-QTY") then the pos_pattern must (AD|CD|M)+ - // else must process: if next arg_type is "NULL" then drop this candidate - // else replace with the next arg_type - for (int index = 0; index < vecPairPSTemp.size(); index++) - { - if ( (vecPairMaxArgsTemp.at(index).first.find(S_QTY_ARG) != string::npos) && - !IsPosPattern(vecPairPSTemp.at(index).first, vecPairPSTemp.at(index).second, vecPos, S_QTY_POS_PAT) ) - { - if ( !vecPairNextArgsTemp.at(index).first.compare(S_NULL_ARG) ) - { - continue; - } - else - { - vecPairMaxArgsTemp.at(index) = vecPairNextArgsTemp.at(index); - } - } - - // add to candidate - vecPairPSBuf.push_back(vecPairPSTemp.at(index)); - vecPairMaxArgsBuf.push_back(vecPairMaxArgsTemp.at(index)); - vecPairNextArgsBuf.push_back(vecPairNextArgsTemp.at(index)); - } -} - -void DepSRL::PSERArgsProcess( - const string& strArgPrefix, - const vector& vecPos, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const -{ - vector vecPSRIndex; - vector vecPSEIndex; - pair pArgPS; - pair pMaxArg; - pair pNextArg; - - string psrArgType = strArgPrefix + S_HYPHEN_TAG + S_PSR_ARG; - string pseArgType = strArgPrefix + S_HYPHEN_TAG + S_PSE_ARG; - // step 1. find the PSR and PSE args index - for (int index = 0; index < vecPairPSBuf.size(); index++) - { - if (vecPairMaxArgsBuf.at(index).first.find(psrArgType) != string::npos) - { - vecPSRIndex.push_back(index); - } - - if (vecPairMaxArgsBuf.at(index).first.find(pseArgType) != string::npos) - { - vecPSEIndex.push_back(index); - } - } - - // step 2. check if matched - if ( vecPSRIndex.empty() && - !vecPSEIndex.empty() ) - { - // process the PSE args - if ( IsMaxPropGreaterThreshold(I_ARG_THRESHOLD_VAL, vecPSEIndex, vecPairMaxArgsBuf) && - FindArgFromDropCand(psrArgType, vecPairPS, vecPairMaxArgs, vecPairNextArgs, pArgPS, pMaxArg, pNextArg) ) - { - //find the matched arg-type - InsertOneArg( pArgPS, pMaxArg, pNextArg, vecPairPSBuf, vecPairMaxArgsBuf, vecPairNextArgsBuf ); - } - } - else if ( !vecPSRIndex.empty() && - vecPSEIndex.empty() ) - { - // process the PSR args - // process the PSE args - if ( IsMaxPropGreaterThreshold(I_ARG_THRESHOLD_VAL, vecPSRIndex, vecPairMaxArgsBuf) && - FindArgFromDropCand(pseArgType, vecPairPS, vecPairMaxArgs, vecPairNextArgs, pArgPS, pMaxArg, pNextArg) ) - { - //find the matched arg-type - InsertOneArg( pArgPS, pMaxArg, pNextArg, vecPairPSBuf, vecPairMaxArgsBuf, vecPairNextArgsBuf ); - } - } - -} - -bool DepSRL::FindArgFromDropCand( - const string& strArgPat, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - pair& pArgPS, - pair& pMaxArg, - pair& pNextArg) const -{ - int maxIndex = -1; - int flag = -1; - double maxProp = 0; - - for (int index = 0; index < vecPairPS.size(); index++) - { - if ( (vecPairMaxArgs.at(index).first.find(strArgPat) != string::npos) && - (vecPairMaxArgs.at(index).second > maxProp) ) - { - maxIndex = index; - maxProp = vecPairMaxArgs.at(index).second; - flag = 1; - } - else if ( (vecPairNextArgs.at(index).first.find(strArgPat) != string::npos) && - (vecPairNextArgs.at(index).second > maxProp) ) - { - maxIndex = index; - maxProp = vecPairNextArgs.at(index).second; - flag = 0; - } - } - - if ( (flag == -1) || (maxProp < 0.01) ) - { - return 0; - } - else if (flag == 1) - { - pMaxArg = vecPairMaxArgs.at(maxIndex); - } - else - { - pMaxArg = vecPairNextArgs.at(maxIndex); - } - - pArgPS = vecPairPS.at(maxIndex); - pNextArg = vecPairNextArgs.at(maxIndex); - return 1; -} - -void DepSRL::ReplaceArgFromNextProp( - const vector& vecIndex, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const -{ - int delIndex = 0; - // if next arg_type is "NULL" then drop this candidate - // else replace with the next arg_type - for (int index = 0; index < vecIndex.size(); index++) - { - if ( !vecPairNextArgsBuf.at(vecIndex.at(index)).first.compare(S_NULL_ARG) ) - { - vecPairPSBuf.erase( vecPairPSBuf.begin() + vecIndex.at(index) - delIndex ); - vecPairMaxArgsBuf.erase( vecPairMaxArgsBuf.begin() + vecIndex.at(index) - delIndex ); - vecPairNextArgsBuf.erase( vecPairNextArgsBuf.begin() + vecIndex.at(index) - delIndex ); - - delIndex++; - } - else - { - vecPairMaxArgsBuf.at(vecIndex.at(index)) = vecPairNextArgsBuf.at(vecIndex.at(index)); - } - } -} - -bool DepSRL::IsPosPattern( - int intBegin, - int intEnd, - const vector& vecPos, - const string& strPattern) const -{ - vector vecItem; - boost::algorithm::split(vecItem, strPattern, boost::is_any_of("|")); - - for (int index = intBegin; index < intEnd; index++) - { - if ( find(vecItem.begin(), vecItem.end(), vecPos.at(index)) == vecItem.end() ) - { - return 0; - } - } - - return 1; -} - -bool DepSRL::IsMaxPropGreaterThreshold( - double dThreSholdVal, - const vector& vecIndex, - const vector< pair >& vecPairMaxArgsBuf) const -{ - vector::const_iterator itIndex; - - itIndex = vecIndex.begin(); - while (itIndex != vecIndex.end()) - { - if (vecPairMaxArgsBuf.at(*itIndex).second >= dThreSholdVal) - { - return 1; - } - - ++ itIndex; + for (int j = 0; j < vecResultForOnePredicate.size(); ++j) { + auto& res = vecResultForOnePredicate[j]; + if (res.first == S_QTY_ARG) { + int k = res.second.first; + for (; k <= res.second.second; ++k) { + if (find(S_QTY_POS_PAT.begin(), S_QTY_POS_PAT.end(), vecPos[k]) != S_QTY_POS_PAT.end()) break; + } + if (k == res.second.second + 1) vecResultForOnePredicate.erase(vecResultForOnePredicate.begin() + j); + QTYArgsProcess(vecPos, vecResultForOnePredicate); + return; } - - return 0; + } } -bool DepSRL::IsInsertNColLabel( - const vector& vecCol, - const pair& pArgCand, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairNextArgBuf, - vector< pair >& vecPairPSBuf) const -{ - int id; - int isPSColInsert = 1; - if ( !vecCol.empty() ) - { - for (id = 0; id < vecCol.size(); id++) - { - // P(Ci) > P(A), no insert - if ( vecPairMaxArgBuf.at(vecCol.at(id)).second > pArgCand.second) - { - // isPSColInsert = 0; - // break; - return 0; - } - } - - /* - // delete the collision nodes - if (isPSColInsert) - { - for (id = 0; id < vecCol.size(); id++) - { - vecPairMaxArgBuf.erase(vecPairMaxArgBuf.begin() + vecCol.at(id) - id); - vecPairNextArgBuf.erase(vecPairNextArgBuf.begin() + vecCol.at(id) - id ); - vecPairPSBuf.erase(vecPairPSBuf.begin() + vecCol.at(id) - id); - } - - return 1; - } - - return 0; - */ - - } - - return 1; -} - -bool DepSRL::IsInsertSameLabel( - const vector& vecSame, - const pair& pArgCand, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairNextArgBuf, - vector< pair >& vecPairPSBuf, - vector& vecSameDel) const -{ - int id; - int isArgSameInsert = 1; - - // P(A) < 0.4 - if (pArgCand.second < 0.4) - { - isArgSameInsert = 0; - } - - if ( !vecSame.empty() ) - { - for (id = 0; id < vecSame.size(); id++) - { - // P(Ei) < P(A) < 0.5, insert - if ( (vecPairMaxArgBuf.at(vecSame.at(id)).second < 0.5) && - (vecPairMaxArgBuf.at(vecSame.at(id)).second < pArgCand.second) ) - { - vecSameDel.push_back(vecSame.at(id)); - isArgSameInsert = 1; - } - } - - //delete the small prob nodes - if (isArgSameInsert) - { - // for (id = 0; id < vecArgDel.size(); id++) - // { - // vecPairMaxArgBuf.erase(vecPairMaxArgBuf.begin() + vecArgDel.at(id) - id); - // vecPairNextArgBuf.erase(vecPairNextArgBuf.begin() + vecArgDel.at(id) - id); - // vecPairPSBuf.erase(vecPairPSBuf.begin() + vecArgDel.at(id) - id); - // } - - return 1; - } - - return 0; - } - else - { - return 1; - } - -} - -int DepSRL::RenameArguments( - vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult - ) -{ - for (vector< pair< int, vector< pair< string, pair< int, int > > > > >::iterator - predicate_iter = vecSRLResult.begin(); - predicate_iter != vecSRLResult.end(); - ++predicate_iter - ) - { - for(vector< pair< string, pair< int, int > > >::iterator - argument_iter = predicate_iter->second.begin(); - argument_iter != predicate_iter->second.end(); - ++argument_iter - ) - { - if (argument_iter->first.substr(0,3) == "ARG") { - argument_iter->first = "A" + argument_iter->first.substr(3); - } - } - } - - return 1; +void DepSRL::manageConfigPath(ModelConf &config, const string &dirPath) { + config.model = dirPath + '/' + config.model; } diff --git a/src/srl/DepSRL.h b/src/srl/DepSRL.h index 68213bd0d..4520803fe 100644 --- a/src/srl/DepSRL.h +++ b/src/srl/DepSRL.h @@ -11,12 +11,15 @@ #ifndef _DEP_SRL_ #define _DEP_SRL_ -#include "MyStruct.h" -#include "SRLBaselineExt.h" #include #include #include -#include + +#include "Pi/config/SrlPiConfig.h" +#include "Srl/config/SrlSrlConfig.h" +#include "Pi/model/SrlPiModel.h" +#include "Srl/model/SrlSrlModel.h" +#include "structure/WordEmbBuilder.h" class DepSRL { @@ -45,54 +48,12 @@ class DepSRL { int GetSRLResult( const vector &words, const vector &POSs, - const vector &NEs, - const vector< pair > &parse, - vector< pair< int, vector< pair > > > > &vecSRLResult - ); - - /* Produce DepSRL result for a sentence (manual predicates) - */ - /* - int GetSRLResult( - const vector &words, - const vector &POSs, - const vector &NEs, const vector< pair > &parse, - const vector &predicates, vector< pair< int, vector< pair > > > > &vecSRLResult ); - */ - /* Produce DepSRL result for a sentence (LTPData interface) - */ - int GetSRLResult( - const LTPData <pData, - const vector &predicates, - vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult, - SRLBaselineExt * m_srlBaseline - ); - - string GetConfigXml(); - string GetSelectFeats(); private: - /* 1.Extract SRL Features from input - */ - int ExtractSrlFeatures( - const LTPData <pData, - const vector &VecAllPredicates, - VecFeatForSent &vecAllFeatures, - VecPosForSent &vecAllPos, - SRLBaselineExt* m_srlBaseline - ); - - /* 2.Predict with the maxent library - */ - int Predict( - VecFeatForSent &vecAllFeatures, - vector< vector< pair > > &vecAllPairMaxArgs, - vector< vector< pair > > &vecAllPairNextArgs - ); /* 3.form the SRL result, based on predict result from maxent model */ @@ -100,155 +61,41 @@ class DepSRL { const vector &words, const vector &POSs, const vector &VecAllPredicates, - VecPosForSent &vecAllPos, - vector< vector< pair > > &vecAllPairMaxArgs, - vector< vector< pair > > &vecAllPairNextArgs, + SrlPiSample& sentence, vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult ); - /* 4. rename arguments to short forms (ARGXYZ->AXYZ) - */ - int RenameArguments( - vector< pair< int, vector< pair< string, pair< int, int > > > > > &vecSRLResult - ); - - /* get parents and relations in the dependent parse tree - */ - void GetParAndRel( - const vector< pair >& vecParser, - vector& vecParent, - vector& vecRelation) const; - - /* Version 1: find verb (predicate to be tagged) in a sentence - */ - void GetPredicateFromSentence( - const vector& vecPos, - vector& vecPredicate,SRLBaselineExt * m_srlBaseline) const; - - /* Version 2: find predicates according to a MaxEnt model - */ - void GetPredicateFromSentence(vector& vecPredicate,SRLBaselineExt * m_srlBaselie) const; - void ProcessOnePredicate( const vector& vecWords, const vector& vecPos, - int intPredicates, - const vector< pair > &vecPairPS, - const vector< pair > &vecPairMaxArgs, - const vector< pair > &vecPairNextArgs, + int intPredicates, + const vector& args, + const vector< pair >& childArea, vector< pair< string, pair< int, int > > > &vecResultForOnePredicate ); private: - /*-----------------for create srl result using--------------------------*/ - void FindCollisionCand( - const vector< pair >& vecPairPSCands, - const pair& pairCurPSCand, - vector& vecPairColPSCands) const; - - void FindSameLabelCand( - const vector< pair >& vecPairArgCands, - const pair& pairCurArgCand, - vector& vecPairSameArgCands) const; - - void InsertOneArg( - const pair& pArgPS, - const pair& pMaxArg, - const pair& pNextArg, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairNextArgBuf) const; - - void RemoveNullLabel( - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - const vector< pair >& vecPairPS, - vector< pair >& vecPairNNLMax, - vector< pair >& vecPairNNLNext, - vector< pair >& vecPairNNLPS) const; - - bool IsInsertNColLabel( - const vector& vecCol, - const pair& pArgCand, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairNextArgBuf, - vector< pair >& vecPairPSBuf) const; - - bool IsInsertSameLabel( - const vector& vecSame, - const pair& pArgCand, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairNextArgBuf, - vector< pair >& vecPairPSBuf, - vector &vctSameDel) const; - - bool IsInsertPredicate( - int intPredicate, - vector< pair >& vecPairMaxArgBuf, - vector< pair >& vecPairPSBuf) const; - /*-----------------for create srl result using--------------------------*/ - + /*-----for form result-----*/ + void GetChildArea(SrlPiSample &sentence, vector> &childArea); + void ProcessCollisions(int intPredicates, vector< pair< string, pair< int, int > > > &ResultForOnePredicate); + /*-----for form result-----*/ private: /*-------------------------for post process-----------------------------*/ - void PostProcess( - const vector& vecPos, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const; - - void QTYArgsProcess( - const vector& vecPos, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const; - void PSERArgsProcess( - const string& strArgPrefix, - const vector& vecPos, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const; - - bool FindArgFromDropCand( - const string& strArgPat, - const vector< pair >& vecPairPS, - const vector< pair >& vecPairMaxArgs, - const vector< pair >& vecPairNextArgs, - pair& pArgPS, - pair& pMaxArg, - pair& pNextArg) const; - - void ReplaceArgFromNextProp( - const vector& vecIndex, - vector< pair >& vecPairPSBuf, - vector< pair >& vecPairMaxArgsBuf, - vector< pair >& vecPairNextArgsBuf) const; - - bool IsPosPattern( - int intBegin, - int intEnd, - const vector& vecPos, - const string& strPattern) const; + void QTYArgsProcess(const vector& vecPos, vector< pair > >& results) const; - bool IsMaxPropGreaterThreshold( - double dThreSholdVal, - const vector& vecIndex, - const vector< pair >& vecPairMaxArgsBuf) const; /*-------------------------for post process-----------------------------*/ private: - bool m_resourceLoaded; + bool m_resourceLoaded; + SrlPiBaseConfig piConfig; + SrlSrlBaseConfig srlConfig; + SrlSrlModel * srl_model; + PiModel * pi_model; + unordered_map> embedding; + private: + void manageConfigPath(ModelConf &config, const string &dirPath); - string m_configXml; - string m_selectFeats; - maxent::ME_Model *m_srlModel; // for role labeling - maxent::ME_Model *m_prgModel; // for predicate recognition }; #endif diff --git a/src/srl/FeatureExtractor.cpp b/src/srl/FeatureExtractor.cpp deleted file mode 100644 index ec9b96354..000000000 --- a/src/srl/FeatureExtractor.cpp +++ /dev/null @@ -1,1492 +0,0 @@ -/* - * File Name : FeatureExtractor.cpp - * Author : msmouse - * Create Time : 2006-12-31 - * Project Name : NewSRLBaseLine - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - */ - - -#include "FeatureExtractor.h" - -#include - -using namespace std; - -// implementation for FeatureNameFunctionMap - -FeatureCollection::FeatureCollection() -{ - // make room for all the features - m_feature_infos.clear(); - m_feature_infos.resize(TOTAL_FEATURE); - - // add feature functions - // node features - // feature_number, type, name, prefix, getter_function - add_feature_(FEAT_DEPREL, FEAT_TYPE_NODE, "DepRelation", "DEPREL", &FeatureExtractor::fg_basic_info_); - add_feature_(FEAT_HEADWORD_POS, FEAT_TYPE_NODE, "HeadwordPOS", "HEAD_POS", &FeatureExtractor::fg_basic_info_); - add_feature_(FEAT_DEPWORD_POS, FEAT_TYPE_NODE, "DepwordPOS", "DEP_POS", &FeatureExtractor::fg_basic_info_); - add_feature_(FEAT_HEADWORD, FEAT_TYPE_NODE, "Headword", "HEADWORD", &FeatureExtractor::fg_basic_info_); - add_feature_(FEAT_DEPWORD, FEAT_TYPE_NODE, "Depword", "DEPWORD", &FeatureExtractor::fg_basic_info_); - add_feature_(FEAT_HEADWORD_LEMMA, FEAT_TYPE_NODE, "HeadwordLemma", "HEDLEMMA", &FeatureExtractor::fg_basic_info_); - add_feature_(FEAT_DEPWORD_LEMMA, FEAT_TYPE_NODE, "DepwordLemma", "DEPLEMMA", &FeatureExtractor::fg_basic_info_); - - add_feature_(FEAT_FIRST_WORD, FEAT_TYPE_NODE, "FirstWord", "FIRST_WD", &FeatureExtractor::fg_constituent_); - add_feature_(FEAT_LAST_WORD, FEAT_TYPE_NODE, "LastWord", "LAST_WD", &FeatureExtractor::fg_constituent_); - add_feature_(FEAT_FIRST_POS, FEAT_TYPE_NODE, "FirstPOS", "FIRST_POS", &FeatureExtractor::fg_constituent_); - add_feature_(FEAT_LAST_POS, FEAT_TYPE_NODE, "LastPOS", "LAST_POS", &FeatureExtractor::fg_constituent_); - add_feature_(FEAT_POS_PATTERN, FEAT_TYPE_NODE, "ConstituentPOSPattern", "POS_PAT", &FeatureExtractor::fg_constituent_); - add_feature_(FEAT_FIRST_LEMMA, FEAT_TYPE_NODE, "FirstLemma", "FIRST_LEM", &FeatureExtractor::fg_constituent_); - add_feature_(FEAT_LAST_LEMMA, FEAT_TYPE_NODE, "LastLemma", "LAST_LEM", &FeatureExtractor::fg_constituent_); - - add_feature_(FEAT_CHD_POS, FEAT_TYPE_NODE, "ChildrenPOS", "CH_POS", &FeatureExtractor::fg_children_pattern_); - add_feature_(FEAT_CHD_POS_NDUP, FEAT_TYPE_NODE, "ChildrenPOSNoDup", "CH_POS2", &FeatureExtractor::fg_children_pattern_); - add_feature_(FEAT_CHD_REL, FEAT_TYPE_NODE, "ChildrenREL", "CH_REL", &FeatureExtractor::fg_children_pattern_); - add_feature_(FEAT_CHD_REL_NDUP, FEAT_TYPE_NODE, "ChildrenRELNoDup", "CH_REL2", &FeatureExtractor::fg_children_pattern_); - - add_feature_(FEAT_SIB_POS, FEAT_TYPE_NODE, "SiblingsPOS", "SB_POS", &FeatureExtractor::fg_siblings_pattern_); - add_feature_(FEAT_SIB_POS_NDUP, FEAT_TYPE_NODE, "SiblingsPOSNoDup", "SB_POS2", &FeatureExtractor::fg_siblings_pattern_); - add_feature_(FEAT_SIB_REL, FEAT_TYPE_NODE, "SiblingsREL", "SB_REL", &FeatureExtractor::fg_siblings_pattern_); - add_feature_(FEAT_SIB_REL_NDUP, FEAT_TYPE_NODE, "SiblingsRELNoDup", "SB_REL2", &FeatureExtractor::fg_siblings_pattern_); - - // Predicate features - - add_feature_(FEAT_PRED_CHD_POS, FEAT_TYPE_PRED, "PredicateChildrenPOS", "P_CH_POS", &FeatureExtractor::fg_predicate_children_pattern_); - add_feature_(FEAT_PRED_CHD_POS_NDUP, FEAT_TYPE_PRED, "PredicateChildrenPOSNoDup", "P_CH_POS2", &FeatureExtractor::fg_predicate_children_pattern_); - add_feature_(FEAT_PRED_CHD_REL, FEAT_TYPE_PRED, "PredicateChildrenREL", "P_CH_REL", &FeatureExtractor::fg_predicate_children_pattern_); - add_feature_(FEAT_PRED_CHD_REL_NDUP, FEAT_TYPE_PRED, "PredicateChildrenRELNoDup", "P_CH_REL2", &FeatureExtractor::fg_predicate_children_pattern_); - - add_feature_(FEAT_PRED_SIB_POS, FEAT_TYPE_PRED, "PredicateSiblingsPOS", "P_SB_POS", &FeatureExtractor::fg_predicate_siblings_pattern_); - add_feature_(FEAT_PRED_SIB_POS_NDUP, FEAT_TYPE_PRED, "PredicateSiblingsPOSNoDup", "P_SB_POS2", &FeatureExtractor::fg_predicate_siblings_pattern_); - add_feature_(FEAT_PRED_SIB_REL, FEAT_TYPE_PRED, "PredicateSiblingsREL", "P_SB_REL", &FeatureExtractor::fg_predicate_siblings_pattern_); - add_feature_(FEAT_PRED_SIB_REL_NDUP, FEAT_TYPE_PRED, "PredicateSiblingsRELNoDup", "P_SB_REL2", &FeatureExtractor::fg_predicate_siblings_pattern_); - - add_feature_(FEAT_PRED_LEMMA, FEAT_TYPE_PRED, "PredicateLemma", "P_LEMMA", &FeatureExtractor::fg_predicate_basic_); - add_feature_(FEAT_PREDICATE, FEAT_TYPE_PRED, "Predicate", "PRED", &FeatureExtractor::fg_predicate_basic_); - add_feature_(FEAT_PRED_SENSE, FEAT_TYPE_PRED, "PredicateSense", "P_SENSE", &FeatureExtractor::fg_predicate_basic_); - - // node_vs_predicate features - - add_feature_(FEAT_PATH, FEAT_TYPE_NODE_VS_PRED, "Path", "PATH", &FeatureExtractor::fg_path_); - add_feature_(FEAT_UP_PATH, FEAT_TYPE_NODE_VS_PRED, "UpPath", "UP_PTH", &FeatureExtractor::fg_path_); - add_feature_(FEAT_REL_PATH, FEAT_TYPE_NODE_VS_PRED, "RelationPath", "REL_PATH", &FeatureExtractor::fg_path_); - add_feature_(FEAT_UP_REL_PATH, FEAT_TYPE_NODE_VS_PRED, "UpRelationPath", "UP_REL_PT", &FeatureExtractor::fg_path_); - - add_feature_(FEAT_PATH_LENGTH, FEAT_TYPE_NODE_VS_PRED, "PathLength", "PATH_LEN", &FeatureExtractor::fg_path_length_); - add_feature_(FEAT_UP_PATH_LEN, FEAT_TYPE_NODE_VS_PRED, "UpPathLength", "UP_PT_LEN", &FeatureExtractor::fg_path_length_); - add_feature_(FEAT_DOWN_PATH_LEN, FEAT_TYPE_NODE_VS_PRED, "DownPathLength", "DN_PT_LEN", &FeatureExtractor::fg_path_length_); - - add_feature_(FEAT_DESC_OF_PD, FEAT_TYPE_NODE_VS_PRED, "DescendantOfPredicate", "D_OF_PRD", &FeatureExtractor::fg_descendant_of_predicate_); - - add_feature_(FEAT_POSITION, FEAT_TYPE_NODE_VS_PRED, "Position", "POSITION", &FeatureExtractor::fg_position_); - - add_feature_(FEAT_PRED_FAMILYSHIP, FEAT_TYPE_NODE_VS_PRED, "PredicateFamilyship", "PRD_FAMIL", &FeatureExtractor::fg_predicate_familyship_); - - - // not addd verb_voice - // add_feature_(FEAT_VERB_VOICE, FEAT_TYPE_NODE, "VerbVoice", "VOICE", &FeatureExtractor::fg_verb_voice_); - // add_feature_(FEAT_PRED_VOICE, FEAT_TYPE_PRED, "PredicateVoice", "PREDVOICE",&FeatureExtractor::fg_predicate_voice_); - // add_feature_(FEAT_NODE_V_PRED, FEAT_TYPE_NODE_VS_PRED, "VerbBetweenPredicate", "N_V_PRED", &FeatureExtractor::fg_has_verb_between_predicate_); - // add_feature_(FEAT_HAS_SV, FEAT_TYPE_PRED, "HasSupportVerb", "HAS_SV", &FeatureExtractor::fg_has_support_verb_); // problem - - - // new features for predicate sense recognition - add_feature_(FEAT_BAG_OF_WORD, FEAT_TYPE_PRED, "PredicateBagOfWords", "P_BOW", &FeatureExtractor::fg_predicate_bag_of_words_); - add_feature_(FEAT_BAG_OF_WORD_O, FEAT_TYPE_PRED, "PredicateBagOfWordsOrdered", "P_BOWO", &FeatureExtractor::fg_predicate_bag_of_words_ordered_); - add_feature_(FEAT_BAG_OF_POS_O, FEAT_TYPE_PRED, "PredicateBagOfPOSOrdered", "P_BOPO", &FeatureExtractor::fg_predicate_bag_of_POSs_ordered_); - add_feature_(FEAT_BAG_OF_POS_N, FEAT_TYPE_PRED, "PredicateBagOfPOSNumbered", "P_BOPN", &FeatureExtractor::fg_predicate_bag_of_POSs_numbered_); - add_feature_(FEAT_WIND5_BIGRAM, FEAT_TYPE_PRED, "PredicateWindow5Bigram", "P_W5BGRM", &FeatureExtractor::fg_predicate_window5_bigram_); - add_feature_(FEAT_WIND5_BIGRAM_POS, FEAT_TYPE_PRED, "PredicateWindow5BigramPOS", "P_W5BGPOS", &FeatureExtractor::fg_predicate_window5_bigram_); - add_feature_(FEAT_BAG_OF_POS_WIND5, FEAT_TYPE_PRED, "PredicateBagOfPOSWindow5", "P_BOPW5", &FeatureExtractor::fg_predicate_bag_of_POSs_window5_); - add_feature_(FEAT_BAG_OF_POS_O_W5, FEAT_TYPE_PRED, "PredicateBagOfPOSorderedWindow5", "P_BOPOW5", &FeatureExtractor::fg_predicate_bag_of_POSs_ordered_); - add_feature_(FEAT_BAG_OF_POS_N_W5, FEAT_TYPE_PRED, "PredicateBagOfPOSNumberedWindow5", "P_POSNW5", &FeatureExtractor::fg_predicate_bag_of_POSs_numbered_); - add_feature_(FEAT_BAG_OF_WORD_IS_DES_O_PRED, FEAT_TYPE_PRED, "PredicateBagOfWordsAndIsDesOfPRED", "P_BOWDP", &FeatureExtractor::fg_predicate_bag_of_words_); - - // special features - // for English - add_feature_(FEAT_VERB_VOICE_EN, FEAT_TYPE_NODE, "VerbVoiceEn", "VOICE_EN", &FeatureExtractor::fg_verb_voice_en_); - add_feature_(FEAT_PRED_VOICE_EN, FEAT_TYPE_PRED, "PredicateVoiceEn", "PREDVOICE_EN", &FeatureExtractor::fg_predicate_voice_en_); - // for Chinese - // for Spanish - // for Catalan - // for German - // for Czech - // for Japanese - // for Spanish Catalan German Czech Japanese - add_feature_(FEAT_SUB_POS, FEAT_TYPE_NODE, "SubPOS", "SUBPOS", &FeatureExtractor::fg_feat_column); - add_feature_(FEAT_PFEAT_COLUMN, FEAT_TYPE_NODE, "PFEATColumn", "PFEATC", &FeatureExtractor::fg_pfeat_column_); - add_feature_(FEAT_PFEAT_EXC_NULL, FEAT_TYPE_NODE, "PFEATExceptNull", "PFEATNULL", &FeatureExtractor::fg_pfeat_column_); - add_feature_(FEAT_PFEAT, FEAT_TYPE_NODE, "PFEAT", "PFEAT", &FeatureExtractor::fg_pfeat_); -} - -void FeatureCollection::add_feature_( - FEAT_NUM feature_number, - FEAT_TYPE type, - const std::string& name, - const std::string& prefix, - const FeatureFunction& getter) -{ - m_feature_infos[feature_number].name = name; - m_feature_infos[feature_number].prefix = prefix; - m_feature_infos[feature_number].type = type; - m_feature_infos[feature_number].getter = getter; - - switch (type) - { - case FEAT_TYPE_PRED: - m_predicate_features.push_back(feature_number); - break; - case FEAT_TYPE_NODE_VS_PRED: - m_node_vs_predicate_features.push_back(feature_number); - break; - default: - break; - } -} - -int FeatureCollection::get_feature_number(const string &feature_name) -{ - // linear search for the given feature name - size_t feature_idx; - for (feature_idx=0; feature_idx(feature_idx); - } - else - { - throw runtime_error("Unknown feature name: " + feature_name); - } -} - -int FeatureCollection::get_feature_type(int feature_number) -{ - return m_feature_infos[feature_number].type; -} - -const FeatureFunction& FeatureCollection::get_feature_function(int feature_number) -{ - return m_feature_infos[feature_number].getter; -} - -const string FeatureCollection::get_feature_prefix(int feature_number) -{ - return m_feature_infos[feature_number].prefix; -} - -// impolementation for FeatureExtractor - -//new function -int FeatureExtractor::get_feature_number_for_extractor(const std::string &feature_name) -{ - return ms_feature_collection.get_feature_number(feature_name); -} -int FeatureExtractor::get_feature_type_for_extractor(int feature_number) -{ - return ms_feature_collection.get_feature_type(feature_number); -} -const FeatureFunction& FeatureExtractor::get_feature_function_for_extractor(int feature_number) -{ - return ms_feature_collection.get_feature_function(feature_number); -} -const std::vector& FeatureExtractor::get_predicate_features_for_extractor() -{ - return ms_feature_collection.get_predicate_features(); -} -const std::vector& FeatureExtractor::get_node_vs_predicate_features_for_extractor() -{ - return ms_feature_collection.get_node_vs_predicate_features(); -} -const std::string FeatureExtractor::get_feature_prefix_for_extractor(int feature_number) -{ - return ms_feature_collection.get_feature_prefix(feature_number); -} -void FeatureExtractor::clear_features() -{ - m_feature_extracted_flags.clear(); - m_feature_values.clear(); - m_feature_values.resize(TOTAL_FEATURE); - - m_node_features_extracted_flag = false; -} - -void FeatureExtractor::set_target_sentence(const Sentence &sentence) -{ - clear_features(); - mp_sentence = &sentence; - - size_t row_count = sentence.get_row_count(); - m_feature_extracted_flags.resize(row_count+1); -} - -void FeatureExtractor::set_feature_set_( - const std::vector& feature_set_str, - FeatureSet& feature_set) -{ - feature_set.clear(); - - set predicate_features; - set node_features; - set node_vs_predicate_features; - - for (size_t i=0; i &feature_set_str) -{ - set_feature_set_(feature_set_str, m_feature_set); -} - -const std::string& FeatureExtractor::get_feature_value_( - const int feature_number, - const size_t row) -{ - if (is_feature_empty_(feature_number, row)) - { - FeatureFunction function - = get_feature_function_for_extractor(feature_number); - - function(this, row); - } - - return get_feature_storage_(feature_number, row); -} - -void FeatureExtractor::set_feature_value_( - const int feature_number, - const size_t row, - const string& feature_value) -{ - get_feature_storage_(feature_number, row) = feature_value; - set_feature_empty_(feature_number, row, false); -} - -bool FeatureExtractor::is_feature_empty_(const int feature_number, const size_t row) -{ - int feature_type - = get_feature_type_for_extractor(feature_number); - - if (FEAT_TYPE_PRED == feature_type) - { - return !m_feature_extracted_flags[m_predicate_row][feature_number]; - } - else - { - return !m_feature_extracted_flags[row][feature_number]; - } -} - -void FeatureExtractor::set_feature_empty_( - const int feature_number, - const size_t row, - const bool empty) -{ - int feature_type - = get_feature_type_for_extractor(feature_number); - - if (FEAT_TYPE_PRED == feature_type) - { - m_feature_extracted_flags[m_predicate_row][feature_number] = !empty; - } - else - { - m_feature_extracted_flags[row][feature_number] = !empty; - } -} - -string& FeatureExtractor::get_feature_storage_( - const int feature_number, - const size_t row) -{ - const int feature_type - = get_feature_type_for_extractor(feature_number); - - switch (feature_type) - { - case FEAT_TYPE_PRED: - if (m_feature_values[feature_number].empty()) - { -// std::cout<<"hello"<get_row_count(); - m_feature_values[feature_number].resize(row_count+1); - } - return m_feature_values[feature_number][row]; - } -} - -void FeatureExtractor::calc_features(const size_t predicate_index) -{ - const Predicate &predicate - = mp_sentence->get_predicates()[predicate_index]; - - m_predicate_row = predicate.row; - - calc_features_(m_feature_set); -} - -void FeatureExtractor::calc_features_(const FeatureSet& feature_set) -{ - calc_predicate_features_(feature_set.for_predicate); - calc_node_vs_predicate_features_(feature_set.for_node_vs_predicate); - calc_node_features_(feature_set.for_node); -} - -void FeatureExtractor::calc_node_features() -{ - calc_node_features_(m_feature_set.for_node); -} - -void FeatureExtractor::calc_node_features_(const vector& node_features) -{ - if (m_node_features_extracted_flag) - { - return; - } - - const SRLTree& parse_tree = mp_sentence->get_parse_tree(); - typedef SRLTree::post_order_iterator PostIter; - for (PostIter node_iter = parse_tree.begin_post(); - node_iter != --parse_tree.end_post(); - ++node_iter) - { - for (size_t i=0; i& predicate_features) -{ - clear_predicate_features_(); - - for (size_t i = 0; i < predicate_features.size(); ++ i) { - int feature_number = predicate_features[i]; - get_feature_value_(feature_number, m_predicate_row); - } -} - -void FeatureExtractor::calc_node_vs_predicate_features_(const vector& node_vs_predicate_features) -{ - clear_node_vs_predicate_features_(); - - // prepare constants - const SRLTree& parse_tree = mp_sentence->get_parse_tree(); - const size_t row_count = mp_sentence->get_row_count(); - - // prepare for path calculation algorithm - get_feature_storage_(FEAT_PATH, m_predicate_row) - = mp_sentence->get_PPOS(m_predicate_row); - get_feature_storage_(FEAT_UP_PATH, m_predicate_row) - = string(); - get_feature_storage_(FEAT_REL_PATH, m_predicate_row) - = string(); - get_feature_storage_(FEAT_UP_REL_PATH, m_predicate_row) - = string(); - - vector node_visited_flags(row_count+1); - - // traversal begins at the predicate - queue nodes_queue; - SRLTree::iterator - node_iter = mp_sentence->get_node_of_row(m_predicate_row); - nodes_queue.push(node_iter); - - // traverse - while (!nodes_queue.empty()) - { - // fetch a node from the queue - node_iter = nodes_queue.front(); - nodes_queue.pop(); - - for (size_t i = 0; i < node_vs_predicate_features.size(); ++ i) { - int feature_number = node_vs_predicate_features[i]; - get_feature_value_(feature_number, *node_iter); - } - - node_visited_flags[*node_iter] = true; // visit; - - // add children to the queue - typedef SRLTree::sibling_iterator SiblingIter; - for (SiblingIter child_iter = node_iter.begin(); - child_iter != node_iter.end(); - ++child_iter) - { - if (!node_visited_flags[*child_iter]) - { - nodes_queue.push(child_iter); - } - } - - // add parent to queue - SRLTree::iterator parent = parse_tree.parent(node_iter); - if (parse_tree.is_valid(parent) && !node_visited_flags[*parent]) - { - nodes_queue.push(parent); - } - } -} - -void FeatureExtractor::clear_predicate_features_() -{ - const std::vector& payload = get_predicate_features_for_extractor(); - for (size_t i = 0; i < payload.size(); ++ i) { - int feature_number = payload[i]; - m_feature_extracted_flags[m_predicate_row][feature_number] = false; - m_feature_values[feature_number].clear(); - } -} - -void FeatureExtractor::clear_node_vs_predicate_features_() -{ - // clear empty flags - for (size_t row=1; row<=mp_sentence->get_row_count(); ++row) - { - const std::vector& payload = get_node_vs_predicate_features_for_extractor(); - for (size_t i = 0; i < payload.size(); ++ i) { - int feature_number = payload[i]; - m_feature_extracted_flags[row][feature_number] = false; - } - } - - // clear feature values - const std::vector& payload = get_node_vs_predicate_features_for_extractor(); - for (size_t i = 0; i < payload.size(); ++ i) { - int feature_number = payload[i]; - m_feature_values[feature_number].clear(); - } -} - -void FeatureExtractor::set_feature_set_by_file( - const string& config_file, - const Configuration &configuration, - vector >& com_features) -{ - ifstream config_stream(config_file.c_str()); - if (!config_stream) - { - throw runtime_error("FeatureExtractor: Error opening config file."); - } - - string line; - com_features.clear(); - vector >* p_features; - p_features = &com_features; - - while (getline(config_stream, line)) - { - if ('#' != line[0]) - p_features->push_back(split_(line)); - } - - // check features in config file belongs language configuration - const vector& features = configuration.get_pred_class_config().get_feature_names(); - check_feature_exist(com_features, features); - set_feature_set( - vct_vct_string2_vct_string(com_features) - ); -} - -void FeatureExtractor::get_feature_string_for_row( - const size_t predicate_row, - string &result, - const vector >& vct_vct_feature_names) -{ - stringstream row_features_stream; - for (size_t i=0; i & com_feature_names = vct_vct_feature_names[i]; - - bool first_part_flag = true; - for (size_t j=0; j& features_for_rows) -{ - features_for_rows.clear(); - features_for_rows.push_back(get_feature_storage_(feature_number, 0)); - - const size_t row_count = mp_sentence->get_row_count(); - for (size_t row=1; row<=row_count; ++row) // row id start at 1 - { - if (is_feature_empty_(feature_number, row)) - { - throw runtime_error("Specified feature_number is empty for row"); - } - - features_for_rows.push_back(get_feature_storage_(feature_number, row)); - } -} - -void FeatureExtractor::fg_basic_info_(const size_t row) -{ - const size_t headword_row = mp_sentence->get_PHEAD(row); - - // set feature values; - set_feature_value_(FEAT_DEPREL, row, mp_sentence->get_PDEPREL(row)); - set_feature_value_(FEAT_HEADWORD, row, mp_sentence->get_FORM(headword_row)); - set_feature_value_(FEAT_DEPWORD, row, mp_sentence->get_FORM(row)); - set_feature_value_(FEAT_HEADWORD_POS, row, mp_sentence->get_PPOS(headword_row)); - set_feature_value_(FEAT_DEPWORD_POS, row, mp_sentence->get_PPOS(row)); - set_feature_value_(FEAT_HEADWORD_LEMMA,row, mp_sentence->get_PLEMMA(headword_row)); - set_feature_value_(FEAT_DEPWORD_LEMMA, row, mp_sentence->get_PLEMMA(row)); - -} - -void FeatureExtractor::fg_constituent_(const size_t row) -{ - const SRLTree& parse_tree = mp_sentence->get_parse_tree(); - - typedef SRLTree::iterator Iter; - const Iter& node = mp_sentence->get_node_of_row(row); - - if (parse_tree.number_of_children(node)) - { - size_t begin = row, end = row; - for (Iter child = node.begin(); child != node.end(); ++child) - { - if (*child < begin) - { - begin = *child; - } - if (*child > end) - { - end = *child; - } - } - - const string& first_FORM = mp_sentence->get_FORM(begin); - const string& first_POS = mp_sentence->get_PPOS(begin); - const string& first_LEMMA = mp_sentence->get_PLEMMA(begin); - const string& last_FORM = mp_sentence->get_FORM(end); - const string& last_POS = mp_sentence->get_PPOS(end); - const string& last_LEMMA = mp_sentence->get_PLEMMA(end); - - set_feature_value_(FEAT_FIRST_WORD, row, first_FORM); - set_feature_value_(FEAT_FIRST_POS, row, first_POS); - set_feature_value_(FEAT_FIRST_LEMMA, row, first_LEMMA); - set_feature_value_(FEAT_LAST_WORD, row, last_FORM); - set_feature_value_(FEAT_LAST_POS, row, last_POS); - set_feature_value_(FEAT_LAST_LEMMA, row, last_LEMMA); - - if (begin == end) - { - set_feature_value_(FEAT_POS_PATTERN, row, first_POS); - throw runtime_error("Only leaf's begin == end"); - } - else - { - string POS_pattern; - POS_pattern = first_POS; - set inner_POS; - for (size_t i=begin+1; i < end; ++i) - { - inner_POS.insert(mp_sentence->get_PPOS(i)); - } - for (set::iterator iter = inner_POS.begin(); - iter != inner_POS.end(); - ++iter) - { - POS_pattern += "-"; - POS_pattern += *iter; - } - POS_pattern += "-"; - POS_pattern += last_POS; - set_feature_value_(FEAT_POS_PATTERN, row, POS_pattern); - } - } - else // leaf - { - const string& FORM = mp_sentence->get_FORM(row); - const string& POS = mp_sentence->get_PPOS(row); - const string& LEMMA = mp_sentence->get_PLEMMA(row); - - set_feature_value_(FEAT_FIRST_WORD, row, FORM); - set_feature_value_(FEAT_FIRST_POS, row, POS); - set_feature_value_(FEAT_FIRST_LEMMA, row, LEMMA); - set_feature_value_(FEAT_LAST_WORD, row, FORM); - set_feature_value_(FEAT_LAST_POS, row, POS); - set_feature_value_(FEAT_LAST_LEMMA, row, LEMMA); - set_feature_value_(FEAT_POS_PATTERN, row, POS); - } -} - -void FeatureExtractor::fg_children_pattern_(const size_t row) -{ - typedef SRLTree::sibling_iterator Iter; - Iter node_iter = mp_sentence->get_node_of_row(row); - - string children_pos; - string children_rel; - string children_pos_ndup; - string children_rel_ndup; - - string child_pos; - string child_rel; - string old_child_pos; - string old_child_rel; - - for (Iter child = node_iter.begin(); - child != node_iter.end(); - ++child) - { - child_pos = mp_sentence->get_PPOS(*child); - child_rel = mp_sentence->get_PDEPREL(*child); - - children_pos.append(child_pos); - children_pos.append("-"); - children_rel.append(child_rel); - children_rel.append("-"); - - if (child_pos != old_child_pos) - { - children_pos_ndup.append(child_pos); - children_pos_ndup.append("-"); - old_child_pos = child_pos; - } - if (child_rel != old_child_rel) - { - children_rel_ndup.append(child_rel); - children_rel_ndup.append("-"); - old_child_rel = child_rel; - } - } - - set_feature_value_(FEAT_CHD_POS, row, children_pos); - set_feature_value_(FEAT_CHD_REL, row, children_rel); - set_feature_value_(FEAT_CHD_POS_NDUP, row, children_pos_ndup); - set_feature_value_(FEAT_CHD_REL_NDUP, row, children_rel_ndup); -} - -void FeatureExtractor::fg_siblings_pattern_( const size_t row ) -{ - typedef SRLTree::sibling_iterator Iter; - const size_t parent_row = mp_sentence->get_PHEAD(row); - Iter parent_node = mp_sentence->get_node_of_row(parent_row); - - string siblings_pos; - string siblings_rel; - string siblings_pos_ndup; - string siblings_rel_ndup; - - string sibling_pos; - string sibling_rel; - string old_sibling_pos; - string old_sibling_rel; - - for (Iter sib = parent_node.begin(); - sib != parent_node.end(); - ++sib) - { - sibling_pos = mp_sentence->get_PPOS(*sib); - sibling_rel = mp_sentence->get_PDEPREL(*sib); - siblings_pos.append(sibling_pos); - siblings_pos.append("-"); - siblings_rel.append(sibling_rel); - siblings_rel.append("-"); - - if (sibling_pos != old_sibling_pos) { - siblings_pos_ndup.append(sibling_pos); - siblings_pos_ndup.append("-"); - old_sibling_pos = sibling_pos; - } - if (sibling_rel != old_sibling_rel) { - siblings_rel_ndup.append(sibling_rel); - siblings_rel_ndup.append("-"); - old_sibling_rel = sibling_rel; - } - } - - set_feature_value_(FEAT_SIB_POS, row, siblings_pos); - set_feature_value_(FEAT_SIB_REL, row, siblings_rel); - set_feature_value_(FEAT_SIB_POS_NDUP, row, siblings_pos_ndup); - set_feature_value_(FEAT_SIB_REL_NDUP, row, siblings_rel_ndup); -} - -void FeatureExtractor::fg_predicate_children_pattern_( const size_t row ) -{ - typedef SRLTree::sibling_iterator Iter; - Iter predicate_node = mp_sentence->get_node_of_row(m_predicate_row); - - string children_pos; - string children_rel; - string children_pos_ndup; - string children_rel_ndup; - - string child_pos; - string child_rel; - string old_child_pos; - string old_child_rel; - - for (Iter child = predicate_node.begin(); - child != predicate_node.end(); - ++child) - { - child_pos = mp_sentence->get_PPOS(*child); - child_rel = mp_sentence->get_PDEPREL(*child); - - children_pos.append(child_pos); - children_pos.append("-"); - children_rel.append(child_rel); - children_rel.append("-"); - - if (child_pos != old_child_pos) - { - children_pos_ndup.append(child_pos); - children_pos_ndup.append("-"); - old_child_pos = child_pos; - } - if (child_rel != old_child_rel) - { - children_rel_ndup.append(child_rel); - children_rel_ndup.append("-"); - old_child_rel = child_rel; - } - } - - set_feature_value_(FEAT_PRED_CHD_POS, m_predicate_row, children_pos); - set_feature_value_(FEAT_PRED_CHD_REL, m_predicate_row, children_rel); - set_feature_value_(FEAT_PRED_CHD_POS_NDUP, m_predicate_row, children_pos_ndup); - set_feature_value_(FEAT_PRED_CHD_REL_NDUP, m_predicate_row, children_rel_ndup); -} - -void FeatureExtractor::fg_predicate_siblings_pattern_(const size_t row) -{ - typedef SRLTree::sibling_iterator Iter; - const size_t parent_row = mp_sentence->get_PHEAD(m_predicate_row); - Iter parent_node = mp_sentence->get_node_of_row(parent_row); - - string siblings_pos; - string siblings_rel; - string siblings_pos_ndup; - string siblings_rel_ndup; - - string sibling_pos; - string sibling_rel; - string old_sibling_pos; - string old_sibling_rel; - - for (Iter sib = parent_node.begin(); - sib != parent_node.end(); - ++sib) - { - sibling_pos = mp_sentence->get_PPOS(*sib); - sibling_rel = mp_sentence->get_PDEPREL(*sib); - siblings_pos.append(sibling_pos); - siblings_pos.append("-"); - siblings_rel.append(sibling_rel); - siblings_rel.append("-"); - - if (sibling_pos != old_sibling_pos) - { - siblings_pos_ndup.append(sibling_pos); - siblings_pos_ndup.append("-"); - old_sibling_pos = sibling_pos; - } - if (sibling_rel != old_sibling_rel) - { - siblings_rel_ndup.append(sibling_rel); - siblings_rel_ndup.append("-"); - old_sibling_rel = sibling_rel; - } - } - - set_feature_value_(FEAT_PRED_SIB_POS, m_predicate_row, siblings_pos); - set_feature_value_(FEAT_PRED_SIB_REL, m_predicate_row, siblings_rel); - set_feature_value_(FEAT_PRED_SIB_POS_NDUP, m_predicate_row, siblings_pos_ndup); - set_feature_value_(FEAT_PRED_SIB_REL_NDUP, m_predicate_row, siblings_rel_ndup); -} - -void FeatureExtractor::fg_predicate_basic_( const size_t row ) -{ - set_feature_value_( - FEAT_PREDICATE, - m_predicate_row, - mp_sentence->get_FORM(m_predicate_row) - ); - - set_feature_value_( - FEAT_PRED_LEMMA, - m_predicate_row, - mp_sentence->get_PLEMMA(m_predicate_row) - ); - - set_feature_value_( - FEAT_PRED_SENSE, - m_predicate_row, - mp_sentence->get_PRED(m_predicate_row) - ); -} - -void FeatureExtractor::fg_path_(const size_t row) -{ - const SRLTree& parse_tree = mp_sentence->get_parse_tree(); - SRLTree::iterator node_iter = mp_sentence->get_node_of_row(row); - SRLTree::iterator parent = parse_tree.parent(node_iter); - - if (row) // skip ROOT (0 == row) - { - // HACK: detect whether the path feature of the parent node is set - const string &path = get_feature_storage_(FEAT_PATH, *parent); - if ("" == path) // parent not yet done, this node knows how to get to the predicate - { - if( row < *parent )//Left - { - get_feature_storage_(FEAT_PATH, *parent) - = mp_sentence->get_PPOS(*parent) - + "get_PDEPREL(row) - + get_feature_storage_(FEAT_REL_PATH, row); - } - else//Right - { - get_feature_storage_(FEAT_PATH, *parent) - = mp_sentence->get_PPOS(*parent) - + "get_PDEPREL(row) - + get_feature_storage_(FEAT_REL_PATH, row); - } - get_feature_storage_(FEAT_UP_PATH, *parent) - = get_feature_storage_(FEAT_UP_PATH, row); - - get_feature_storage_(FEAT_UP_REL_PATH, *parent) - = get_feature_storage_(FEAT_UP_REL_PATH, row); - } - else - { // parent path already got (parent knows the path to the predicate) - if(row < *parent)//Left - { - get_feature_storage_(FEAT_PATH, row) - = mp_sentence->get_PPOS(row) - + ">L#" - + get_feature_storage_(FEAT_PATH, *parent); - - get_feature_storage_(FEAT_UP_PATH, row) - = mp_sentence->get_PPOS(row) - + ">L#" - + get_feature_storage_(FEAT_UP_PATH, *parent); - - get_feature_storage_(FEAT_REL_PATH, row) - = mp_sentence->get_PDEPREL(row) - + ">L#" - + get_feature_storage_(FEAT_REL_PATH, *parent); - - get_feature_storage_(FEAT_UP_REL_PATH, row) - = mp_sentence->get_PDEPREL(row) - + ">L#" - + get_feature_storage_(FEAT_UP_REL_PATH, *parent); - } - else//Right - { - get_feature_storage_(FEAT_PATH, row) - = mp_sentence->get_PPOS(row) - + ">R#" - + get_feature_storage_(FEAT_PATH, *parent); - - get_feature_storage_(FEAT_UP_PATH, row) - = mp_sentence->get_PPOS(row) - + ">R#" - + get_feature_storage_(FEAT_UP_PATH, *parent); - - get_feature_storage_(FEAT_REL_PATH, row) - = mp_sentence->get_PDEPREL(row) - + ">R#" - + get_feature_storage_(FEAT_REL_PATH, *parent); - - get_feature_storage_(FEAT_UP_REL_PATH, row) - = mp_sentence->get_PDEPREL(row) - + ">R#" - + get_feature_storage_(FEAT_UP_REL_PATH, *parent); - } - - } - } - - set_feature_empty_(FEAT_PATH, row, false); - set_feature_empty_(FEAT_UP_PATH, row, false); - set_feature_empty_(FEAT_REL_PATH, row, false); - set_feature_empty_(FEAT_UP_REL_PATH, row, false); -} - -void FeatureExtractor::fg_path_length_(const size_t row) -{ - const std::string& path = get_feature_value_(FEAT_PATH, row); - const std::string& up_path = get_feature_value_(FEAT_UP_PATH, row); - - int up_path_len = std::count(path.begin(), path.end(), '>'); - int down_path_len = std::count(path.begin(), path.end(), '<'); - int path_length = up_path_len + down_path_len; - - get_feature_storage_(FEAT_PATH_LENGTH, row) = int2string(path_length); - get_feature_storage_(FEAT_UP_PATH_LEN, row) = int2string(up_path_len); - get_feature_storage_(FEAT_DOWN_PATH_LEN, row) = int2string(down_path_len); - - set_feature_empty_(FEAT_PATH_LENGTH, row, false); - set_feature_empty_(FEAT_UP_PATH_LEN, row, false); - set_feature_empty_(FEAT_DOWN_PATH_LEN, row, false); - - -/* if (row) // skip ROOT (0 == row) - { - const int parent_path_length - = string2int(get_feature_storage_(FEAT_PATH_LENGTH, *parent)); - - if ( parent_path_length == 0 && *parent != m_predicate_row) // parent not yet done, this node knows how to get to the predicate - { - get_feature_storage_(FEAT_PATH_LENGTH, *parent) = - int2string( - string2int(get_feature_storage_(FEAT_PATH_LENGTH, row))+1); - - get_feature_storage_(FEAT_UP_PATH_LEN, *parent) = - get_feature_storage_(FEAT_UP_PATH_LEN, row); - - get_feature_storage_(FEAT_DOWN_PATH_LEN, *parent) = - int2string( - string2int(get_feature_storage_(FEAT_DOWN_PATH_LEN, row)) + 1); - } - else // parent path length already got (parent knows the path length to the predicate) - { - get_feature_storage_(FEAT_PATH_LENGTH, row) = - int2string( - string2int(get_feature_storage_(FEAT_PATH_LENGTH, *parent))+1); - - get_feature_storage_(FEAT_UP_PATH_LEN, row) = - int2string( - string2int(get_feature_storage_(FEAT_UP_PATH_LEN, *parent))+1); - - get_feature_storage_(FEAT_DOWN_PATH_LEN, row) = - get_feature_storage_(FEAT_DOWN_PATH_LEN, *parent); - } - } - */ -} - -void FeatureExtractor::fg_descendant_of_predicate_( const size_t row ) -{ - const string& up_path_length - = get_feature_value_(FEAT_UP_PATH_LEN, row); - const string& down_path_length - = get_feature_value_(FEAT_DOWN_PATH_LEN, row); - - if ("0" == down_path_length && "0" != up_path_length) - { - set_feature_value_(FEAT_DESC_OF_PD, row, "1"); - } - else - { - set_feature_value_(FEAT_DESC_OF_PD, row, "0"); - } -} - -void FeatureExtractor::fg_position_(const size_t row) -{ - if (row <= m_predicate_row) - { - set_feature_value_(FEAT_POSITION, row, "before"); - } - else - { - set_feature_value_(FEAT_POSITION, row, "after"); - } -} - -void FeatureExtractor::fg_predicate_familyship_( const size_t row ) -{ - const string& up_path_length - = get_feature_value_(FEAT_UP_PATH_LEN, row); - const string& down_path_length - = get_feature_value_(FEAT_DOWN_PATH_LEN, row); - - string familyship; - - if ("0" == down_path_length) - { - if ("0" == up_path_length) - { - familyship = "self"; - } - else if ("1" == up_path_length) - { - familyship = "child"; - } - else - { - familyship = "descendant"; - } - } - else if ("0" == up_path_length) - { - if ("1" == down_path_length) - { - familyship = "parent"; - } - else - { - familyship = "ancestor"; - } - } - else if ("1" == up_path_length && "1" == down_path_length) - { - familyship = "sibling"; - } - else - { - familyship = "not-relative"; - } - - set_feature_value_(FEAT_PRED_FAMILYSHIP, row, familyship); - -} - -void FeatureExtractor::fg_predicate_bag_of_words_(const size_t row) -{ - const string& prefix = get_feature_prefix_for_extractor(FEAT_BAG_OF_WORD)+"@"; - const size_t row_count = mp_sentence->get_row_count(); - - string bag_of_words = "NONSENSE"; - - for (size_t i=1; iget_FORM(i); - } - bag_of_words += " "; - bag_of_words += prefix; - bag_of_words += mp_sentence->get_FORM(m_predicate_row); - for (size_t i=m_predicate_row+1; i<=row_count; ++i) { - bag_of_words += " "; - bag_of_words += prefix; - bag_of_words += mp_sentence->get_FORM(i); - } - - set_feature_value_(FEAT_BAG_OF_WORD, row, bag_of_words); - - string bag_of_words_add_des_of_pred = ""; - const string& new_prefix = get_feature_prefix_for_extractor(FEAT_BAG_OF_WORD_IS_DES_O_PRED)+"@"; - - for (size_t i=1; i<=row_count; ++i) - { - if (bag_of_words_add_des_of_pred != "") - { - bag_of_words_add_des_of_pred += " "; - bag_of_words_add_des_of_pred += new_prefix; - } - bag_of_words_add_des_of_pred += mp_sentence->get_FORM(i); - bag_of_words_add_des_of_pred += "_"; - bag_of_words_add_des_of_pred += get_feature_value_(FEAT_DESC_OF_PD, i); - } - - set_feature_value_(FEAT_BAG_OF_WORD_IS_DES_O_PRED, row, bag_of_words_add_des_of_pred); -} - -void FeatureExtractor::fg_predicate_bag_of_words_ordered_(const size_t row) -{ - const string& prefix =get_feature_prefix_for_extractor(FEAT_BAG_OF_WORD_O)+"@"; - const size_t row_count = mp_sentence->get_row_count(); - - string bag_of_words_o = "NONSENSE"; - - for (size_t i=1; iget_FORM(i); - bag_of_words_o += "_l"; - } - bag_of_words_o += " "; - bag_of_words_o += prefix; - bag_of_words_o += mp_sentence->get_FORM(m_predicate_row); - bag_of_words_o += "_t"; - - for (size_t i=m_predicate_row+1; i<=row_count; ++i) { - bag_of_words_o += " "; - bag_of_words_o += prefix; - bag_of_words_o += mp_sentence->get_FORM(i); - bag_of_words_o += "_r"; - } - - set_feature_value_(FEAT_BAG_OF_WORD_O, m_predicate_row, bag_of_words_o); -} - -void FeatureExtractor::fg_predicate_bag_of_POSs_ordered_(const size_t row) -{ - const string& prefix = get_feature_prefix_for_extractor(FEAT_BAG_OF_POS_O)+"@"; - const size_t row_count = mp_sentence->get_row_count(); - - string bag_of_POSs_o = "NONSENSE"; - - for (size_t i=1; iget_PPOS(i); - bag_of_POSs_o += "_l"; - } - bag_of_POSs_o += " "; - bag_of_POSs_o += prefix; - bag_of_POSs_o += mp_sentence->get_PPOS(m_predicate_row); - bag_of_POSs_o += "_t"; - - for (size_t i=m_predicate_row+1; i<=row_count; ++i) { - bag_of_POSs_o += " "; - bag_of_POSs_o += prefix; - bag_of_POSs_o += mp_sentence->get_PPOS(i); - bag_of_POSs_o += "_r"; - } - - set_feature_value_(FEAT_BAG_OF_POS_O, m_predicate_row, bag_of_POSs_o); - - string bag_of_POSs_o_w5 = ""; - const string& w5_prefix = get_feature_prefix_for_extractor(FEAT_BAG_OF_POS_O_W5) + "@"; - const size_t wind_begin = (m_predicate_row-5>1 ? m_predicate_row-5 : 1); - const size_t wind_end = (m_predicate_row+5get_PPOS(i); - bag_of_POSs_o_w5 += "_l"; - } - if (bag_of_POSs_o_w5!= "") - { - bag_of_POSs_o_w5 += " "; - bag_of_POSs_o_w5 += w5_prefix; - } - bag_of_POSs_o_w5 += mp_sentence->get_PPOS(m_predicate_row); - bag_of_POSs_o_w5 += "_t"; - - for (size_t i=m_predicate_row+1; i<=wind_end; ++i) - { - bag_of_POSs_o_w5 += " "; - bag_of_POSs_o_w5 += w5_prefix; - bag_of_POSs_o_w5 += mp_sentence->get_PPOS(i); - bag_of_POSs_o_w5 += "_r"; - } - set_feature_value_(FEAT_BAG_OF_POS_O_W5, m_predicate_row, bag_of_POSs_o_w5); - -} -void FeatureExtractor::fg_predicate_bag_of_POSs_window5_(const size_t row) -{ - const string& prefix = get_feature_prefix_for_extractor(FEAT_BAG_OF_POS_WIND5)+ "@"; - const size_t row_count = mp_sentence->get_row_count(); - - string bag_of_POSs_window5 = ""; - const size_t wind_begin = (m_predicate_row-5>1 ? m_predicate_row-5 : 1); - const size_t wind_end = (m_predicate_row+5get_PPOS(i); - } - set_feature_value_(FEAT_BAG_OF_POS_WIND5, m_predicate_row, bag_of_POSs_window5); -} - -void FeatureExtractor::fg_predicate_bag_of_POSs_numbered_(const size_t row) -{ - const string& prefix = get_feature_prefix_for_extractor(FEAT_BAG_OF_POS_N)+"@"; - const size_t row_count = mp_sentence->get_row_count(); - - stringstream bag_of_POSs_n; - bag_of_POSs_n<<"NONSENSE"; - - for (size_t i=m_predicate_row-1; i>=1; --i) { - const int distance = int(i - m_predicate_row); - bag_of_POSs_n - <<" " - <get_PPOS(i) - <<"_" - <get_PPOS(m_predicate_row) - <<"_" - <<0; - for (size_t i=m_predicate_row+1; i<=row_count; ++i) { - const int distance = int(i - m_predicate_row); - bag_of_POSs_n - <<" " - <get_PPOS(i) - <<"_" - <1 ? m_predicate_row-5 : 1); - const size_t wind_end = (m_predicate_row+5= wind_begin; --i) - { - const int distance = int(i-m_predicate_row); - if (visit) - { - bag_of_POSs_n_w5 - <<" "<get_PPOS(i)<<"_"<get_PPOS(m_predicate_row)<<"_"<<0; - - for (size_t i=m_predicate_row+1; i<=wind_end; ++i) - { - const int distance = int(i-m_predicate_row); - bag_of_POSs_n_w5<<" "<get_PPOS(i)<<"_"<get_row_count(); - - string wind5_bigram = "NONSENSE"; - - const size_t wind_begin = (m_predicate_row-5>1 ? m_predicate_row-5 : 1); - const size_t wind_end = (m_predicate_row+5get_FORM(i); - wind5_bigram += "_"; - wind5_bigram += mp_sentence->get_FORM(i+1); - } - - set_feature_value_(FEAT_WIND5_BIGRAM, m_predicate_row, wind5_bigram); - - const string& pos_prefix = get_feature_prefix_for_extractor(FEAT_WIND5_BIGRAM_POS)+"@"; - string wind5_bigram_pos = ""; - for (size_t i=wind_begin; iget_PPOS(i); - wind5_bigram_pos +="_"; - wind5_bigram_pos +=mp_sentence->get_PPOS(i+1); - } - - set_feature_value_(FEAT_WIND5_BIGRAM_POS, m_predicate_row, wind5_bigram_pos); -} - - -void FeatureExtractor::fg_verb_voice_en_(const size_t row) -{ - const string& PPOS = mp_sentence->get_PPOS(row); - const string& LEMMA = mp_sentence->get_PLEMMA(row); - - if (!m_configuration.is_verbPOS(PPOS)) - { - set_feature_value_(FEAT_VERB_VOICE_EN, row, "NON_VERB"); - } - else if ( ("VBN" == PPOS || "VBD" == PPOS) - && - ("be" == get_feature_value_(FEAT_HEADWORD_LEMMA, row) - || "get" == get_feature_value_(FEAT_HEADWORD_LEMMA, row) - || "APPO" == get_feature_value_(FEAT_DEPREL, row)) - ) - { - set_feature_value_(FEAT_VERB_VOICE_EN, row, "PASSIVE"); - } - else - { - set_feature_value_(FEAT_VERB_VOICE_EN, row, "ACTIVE"); - } -} - -void FeatureExtractor::fg_predicate_voice_en_(const size_t row) -{ - set_feature_value_( - FEAT_PRED_VOICE_EN, - row, - get_feature_value_(FEAT_VERB_VOICE_EN, m_predicate_row) - ); -} - -void FeatureExtractor::fg_feat_column(const size_t row) -{ - const string& pfeat = mp_sentence->get_PFEAT(row); - if (pfeat == "_") - { - throw runtime_error("feat_column function cannot calc the pfeat column is empty"); - } - map feat_res = split_feat_(pfeat); - - if (feat_res.find("SubPOS") != feat_res.end()) - { - set_feature_value_(FEAT_SUB_POS, row, feat_res["SubPOS"]); - } - else - { - set_feature_value_(FEAT_SUB_POS, row, ""); - } -} - -void FeatureExtractor::fg_pfeat_column_(const size_t row) -{ - const string& pfeat = mp_sentence->get_PFEAT(row); - if ("_" == pfeat) - { - set_feature_value_(FEAT_PFEAT_COLUMN, row, ""); - set_feature_value_(FEAT_PFEAT_EXC_NULL, row, ""); - return; - } - string prefix = get_feature_prefix_for_extractor(FEAT_PFEAT_COLUMN)+"@"; - - string prefix_exc_null = get_feature_prefix_for_extractor(FEAT_PFEAT_EXC_NULL)+"@"; - - vector result = split_(pfeat, '|'); - sort(result.begin(), result.end()); - string pfeat_str = ""; - string pfeat_exc_null = ""; - - string last_res; - if (result.size() > 0) - { - last_res = result[0]; - pfeat_str+=last_res; - pfeat_exc_null+=last_res; - } - for (size_t i=1; iget_PFEAT(row); - set_feature_value_(FEAT_PFEAT, row, pfeat); -} - diff --git a/src/srl/FeatureExtractor.h b/src/srl/FeatureExtractor.h deleted file mode 100644 index 19aa0168a..000000000 --- a/src/srl/FeatureExtractor.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * File Name : FeatureExtractor.h - * Author : msmouse - * Create Time : 2006-12-31 - * Project Name : NewSRLBaseLine - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - */ - - -#ifndef _FEATURE_EXTRACTOR_H_ -#define _FEATURE_EXTRACTOR_H_ - -#include "boost/function.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include "Sentence.h" -#include "Configuration.h" - -class FeatureExtractor; - -/* a boost::function is a wraper for either a function pointer or function - * object with the specified interface - * a FeatureFunction is a member function of FeatureExtractor, with a parameter - * of the type size_t (the row number in a sentence) - */ -typedef boost::function FeatureFunction; - - -// type of a feature -enum FEAT_TYPE -{ - FEAT_TYPE_PRED, // predicate feature (related to the predicate itself only) - FEAT_TYPE_NODE, // predicate-independent feature (related to a node only) - FEAT_TYPE_NODE_VS_PRED, /* predicate-dependent feature - * (related to the relationship between the node and the predicate) - */ - FEAT_TYPE_UNKNOWN // unknown feature type, usually not used, usually causing a exception -}; - - -// feature numbers, each relating to a feature name, used internally, for the sake of efficiency -enum FEAT_NUM -{ - FEAT_DEPREL, // the dep-relation name - FEAT_HEADWORD_POS, // head word POS - FEAT_DEPWORD_POS, // dep word POS - FEAT_HEADWORD, // headword - FEAT_DEPWORD, // depword - FEAT_HEADWORD_LEMMA, // head word lemma - FEAT_DEPWORD_LEMMA, // dep word lemma - FEAT_FIRST_WORD, // first word in the subtree - FEAT_FIRST_POS, // first POS in the subtree - FEAT_FIRST_LEMMA, // first word lemma - FEAT_LAST_WORD, // last word in the subtree - FEAT_LAST_POS, // last POS in the subtree - FEAT_LAST_LEMMA, // last word lemma - FEAT_POS_PATTERN, - // first-pos + inner POS's (duplicated reduced) + last-pos - // see hjliu's BegEndPosPattern in the paper - FEAT_CHD_POS, // pos pattern for children - FEAT_CHD_POS_NDUP, // (no duplicate) - FEAT_CHD_REL, // relation pattern for children - FEAT_CHD_REL_NDUP, // (no duplicate) - FEAT_SIB_POS, // pos pattern for siblings - FEAT_SIB_POS_NDUP, // (no duplicate) - FEAT_SIB_REL, // relation pattern for siblings - FEAT_SIB_REL_NDUP, // (no duplicate) - - FEAT_HAS_SV, // whether has a Support Verb - FEAT_PRED_CHD_POS, // pos pattern for predicate children - FEAT_PRED_CHD_POS_NDUP, // (no duplicate) - FEAT_PRED_CHD_REL, // relation pattern for predicate children - FEAT_PRED_CHD_REL_NDUP, // (no duplicate) - FEAT_PRED_SIB_POS, // pos pattern for predicate siblings - FEAT_PRED_SIB_POS_NDUP, // (no duplicate) - FEAT_PRED_SIB_REL, // relation pattern for predicate siblings - FEAT_PRED_SIB_REL_NDUP, // (no duplicate) - FEAT_PRED_LEMMA, // predicate lemma - FEAT_PREDICATE, // predicate itself - FEAT_PRED_SENSE, // predicate lemma + sense - - - FEAT_PATH, // the path from the node to the predicate - FEAT_UP_PATH, // the path from node to common parent - FEAT_REL_PATH, // relations along the path - FEAT_UP_REL_PATH, // relations along the half path - FEAT_PATH_LENGTH, // length of the feature "path" - FEAT_UP_PATH_LEN, // - FEAT_DOWN_PATH_LEN, // - FEAT_DESC_OF_PD, // whether is a descendant of the predicate - FEAT_POSITION, // before or after the predicate - FEAT_PRED_FAMILYSHIP, // parent/child/sibling of the predicate - - // new features for predicate sense - FEAT_BAG_OF_WORD, // all words in the sentence (multiple features) - FEAT_BAG_OF_WORD_O, // all words with left/target/right suffix - FEAT_BAG_OF_POS_O, // all POS's with numbered suffix - FEAT_BAG_OF_POS_N, // all POS's with left/target/right suffix - FEAT_WIND5_BIGRAM, // bigrams in the context window (5 word each side) - FEAT_WIND5_BIGRAM_POS, - FEAT_BAG_OF_POS_WIND5, - FEAT_BAG_OF_POS_O_W5, - FEAT_BAG_OF_POS_N_W5, - FEAT_BAG_OF_WORD_IS_DES_O_PRED, - - FEAT_VERB_VOICE_EN, - FEAT_PRED_VOICE_EN, - - FEAT_SUB_POS, - FEAT_PFEAT_COLUMN, - FEAT_PFEAT_EXC_NULL, - FEAT_PFEAT, - - FEAT_NODE_V_PRED, // there's a verb between node and predicate - - /* FEAT_VERB_VOICE, // verb voice (for nouns are "NONVERB") - * FEAT_PRED_VOICE, // the voice of verb predicate (for PRED_NOUN's are "NONVERB") - */ - - TOTAL_FEATURE, // total feature number -}; - -/* Auxiliary class for FeatureExtractor, holding information for the features - * all FeatureExtractor objects hold one common non-static FeatureCollection, for - * looking up feature informations (such as feature names, feature prefix, etc) - */ -class FeatureCollection -{ - public: - /* constructor, register features, record their feature number, - * feature name, feature prefix, feature type, etc for later looking up - */ - FeatureCollection(); - - /* get the feature number for a given feature name - */ - int get_feature_number(const std::string &feature_name); - - /* get the type of a given feature number - */ - int get_feature_type(int feature_number); - - /* get the feature extraction function object of a given feature number - */ - const FeatureFunction& get_feature_function(int feature_number); - - /* get the feature prefix for output of a given feature number - */ - const std::string get_feature_prefix(int feature_number); - - /* get predicate feature number list - */ - const std::vector& get_predicate_features() - { - return m_predicate_features; - } - - /* get predicate feature number list - */ - const std::vector& get_node_vs_predicate_features() - { - return m_node_vs_predicate_features; - } - - private: - struct FeatureInfo - { - std::string name; - std::string prefix; - FEAT_TYPE type; - FeatureFunction getter; // see FeatureFunction typedef - }; - - private: - /* register informations for a feature, invoked in the constructor - */ - void add_feature_( - FEAT_NUM feature_number, - FEAT_TYPE type, - const std::string& name, - const std::string& prefix, - const FeatureFunction& getter); - - private: - std::vector m_feature_infos; - std::vector m_predicate_features; - std::vector m_node_vs_predicate_features; - -}; - -struct FeatureSet -{ - std::vector for_predicate; - std::vector for_node; - std::vector for_node_vs_predicate; - - void clear() - { - for_predicate.clear(); - for_node.clear(); - for_node_vs_predicate.clear(); - } -}; - -class FeatureExtractor -{ - public: - explicit FeatureExtractor(const Configuration& config) - { - set_feature_set(config.get_argu_config().get_feature_names()); - m_configuration = config; - } - - /* set the sentence from which features are extracted - */ - void set_target_sentence(const Sentence &sentence); - - /* calculate all features in the feature set - */ - void calc_features(const size_t predicate_index); - void calc_node_features(); - - void get_feature_for_rows( - int feature_number, - std::vector& features_for_rows); - - void set_feature_set(const std::vector& feature_set_str); - - void clear_features(); - - /* used for predicate sense - */ - void set_feature_set_by_file( - const std::string& config_file, - const Configuration& configuration, - std::vector >& com_features); - - void get_feature_string_for_row( - const size_t predicate_row, - std::string &result, - const std::vector >& m_vct_vct_feature_names); - //new function - int get_feature_number_for_extractor(const std::string &feature_name); - int get_feature_type_for_extractor(int feature_number); - const FeatureFunction& get_feature_function_for_extractor(int feature_number); - const std::vector& get_predicate_features_for_extractor(); - const std::vector& get_node_vs_predicate_features_for_extractor(); - const std::string get_feature_prefix_for_extractor(int feature_number); - - - private: - /* Get single feature for specific row - * if not yet calculated, do it immediately - */ - const std::string& get_feature_value_(const int feature_number, const size_t row); - - void set_feature_value_(const int feature_number, const size_t row, const std::string& feature_value); - - /* whether a specified feature for specified row is empty - */ - bool is_feature_empty_(const int feature_number, const size_t row); - - void set_feature_empty_(const int feature_number, const size_t row, const bool empty); - - void set_feature_set_( - const std::vector& feature_set_str, - FeatureSet& feature_set); - - std::string& get_feature_storage_(const int feature_number, const size_t row); - - void calc_features_(const FeatureSet& feature_set); - - void calc_node_features_(const std::vector& node_features); - - void calc_predicate_features_(const std::vector& predicate_features); - - void calc_node_vs_predicate_features_(const std::vector& node_vs_predicate_features); - - void clear_predicate_features_(); - void clear_node_vs_predicate_features_(); - - int string2int(const std::string& str) - { - std::istringstream in_stream(str); - size_t res; - in_stream>>res; - return res; - } - - std::string int2string(const int num) - { - std::ostringstream out_stream; - out_stream< split_(std::string line, char s='+') - { - replace(line.begin(), line.end(), s, ' '); - std::istringstream istr(line); - std::vector res; - std::string tmp_str; - while (istr>>tmp_str) - { - res.push_back(tmp_str); - } - return res; - } - - std::map split_feat_(std::string line) - { - replace(line.begin(), line.end(), '|', ' '); - std::istringstream istr(line); - std::map res; - std::string tmp_str; - while (istr>>tmp_str) - { - size_t find = tmp_str.find("="); - assert(std::string::npos != find); - std::string word = tmp_str.substr(0, find); - std::string value = tmp_str.substr(find+1); - res[word] = value; - } - return res; - } - - void check_feature_exist( - const std::vector >& com_features, - const std::vector& feature_set) - { - for (size_t i=0; i vct_vct_string2_vct_string( - const std::vector >& feature_set) - { - std::vector res; - for (size_t i=0; i > m_feature_values; - - // flag for whether a feature is already calculated for specific row - std::vector > m_feature_extracted_flags; - - // Configuration - Configuration m_configuration; - - private: - void fg_basic_info_(const size_t row); - void fg_constituent_(const size_t row); - void fg_children_pattern_(const size_t row); - void fg_siblings_pattern_(const size_t row); - // void fg_has_support_verb_(const size_t row); - void fg_predicate_children_pattern_(const size_t row); - void fg_predicate_siblings_pattern_(const size_t row); - void fg_predicate_basic_(const size_t row); - void fg_path_(const size_t row); - void fg_path_length_(const size_t row); - void fg_descendant_of_predicate_(const size_t row); - void fg_position_(const size_t row); - void fg_predicate_familyship_(const size_t row); - void fg_predicate_bag_of_words_(const size_t row); - void fg_predicate_bag_of_words_ordered_(const size_t row); - void fg_predicate_bag_of_POSs_ordered_(const size_t row); - void fg_predicate_bag_of_POSs_numbered_(const size_t row); - void fg_predicate_window5_bigram_(const size_t row); - - void fg_verb_voice_en_(const size_t row); - void fg_predicate_voice_en_(const size_t row); - void fg_feat_column(const size_t row); - void fg_predicate_bag_of_POSs_window5_(const size_t row); - void fg_pfeat_column_(const size_t row); - void fg_pfeat_(const size_t row); - -}; - - -#endif - diff --git a/src/srl/GetInstance.cpp b/src/srl/GetInstance.cpp deleted file mode 100644 index 2037915c3..000000000 --- a/src/srl/GetInstance.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * File Name : GetInstance.cpp - * Author : msmouse - * Create Time : 2006-12-31 - * Project Name : NewSRLBaseLine - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - */ - - -#include "GetInstance.h" - -using namespace std; - -void GetInstance::generate_argu_instance( - const string& feature_folder, - const string& select_config, - const string& instance_file, - bool is_devel) -{ - open_select_config(select_config); - close_(); - test_feature_set_(feature_folder); - - // open - ofstream inst_stream(instance_file.c_str()); - - if (!inst_stream) - { - throw runtime_error("instance file cannot open"); - } - - vector values; - - string tmp; - tmp = feature_folder + "/labels"; - m_label_stream.open(tmp.c_str()); - if (! m_label_stream ) - { - throw runtime_error(feature_folder+"/labels cannot open"); - } - - // output - while ( getline(m_label_stream, tmp) ) - { - if (tmp == "") - { - read_line_(values); - } - else - { - read_line_(values); - if (! is_devel) - { - inst_stream << tmp << " "; - } - output_(inst_stream, values, m_select_features); - } - } - - inst_stream.close(); -} - -void GetInstance::output_(ofstream& out_stream, - const vector& values, - const vector >& select_features) -{ - for (size_t i=0; i& com_feature = select_features[i]; - if (0 != i) - { - out_stream<<" "; - } - for (size_t j=0; j vec_str; - replace(line.begin(), line.end(), '+', ' '); - istringstream istr(line); - string temp_str; - while (istr>>temp_str) - { - vec_str.push_back(temp_str); - } - - m_select_features.push_back(vec_str); - } - } - conf_input.close(); -} - -void GetInstance::close_() -{ - for (size_t i=0; i &values) -{ - if (values.size() < TOTAL_FEATURE) - { - values.resize(TOTAL_FEATURE); - } - - for (size_t feature_number =0; feature_number >& select_features, - const vector& features, - const string& feature_folder) -{ - // test the feature in select_config file exist in the language configruation - m_opened_flags.resize(TOTAL_FEATURE, false); - for (size_t i=0; i & com_feature = select_features[i]; - - for (size_t j=0; j -#include -#include -#include "Configuration.h" -#include "FeatureExtractor.h" - -class GetInstance -{ - public: - explicit GetInstance(const Configuration& configuration) - : m_configuration(configuration) - { - m_opened_flags.resize(TOTAL_FEATURE, false); - } - - - void generate_argu_instance( - const std::string& feature_folder, - const std::string& select_config, - const std::string& instance_file, - bool is_devel=false); - - private: - void open_select_config(const std::string& select_config); - void close_(); - void read_line_(std::vector& values); - void test_and_open_( - const std::vector >& select_features, - const std::vector& features, - const std::string& feature_folder); - - void test_feature_set_(const std::string& feature_folder) - { - test_and_open_( - m_select_features, - m_configuration.get_argu_config().get_feature_names(), - feature_folder); - } - - void output_( - std::ofstream& out_stream, - const std::vector &values, - const std::vector >& select_features); - - private: - GetInstance(const GetInstance &); - GetInstance & operator=(const GetInstance &); - - private: - Configuration m_configuration; - std::ifstream m_input_streams[TOTAL_FEATURE]; - std::ifstream m_label_stream; - FeatureCollection m_feature_collection; - std::vector m_opened_flags; - std::vector > m_select_features; -}; - -#endif - diff --git a/src/srl/MyStruct.h b/src/srl/MyStruct.h deleted file mode 100644 index db9690f2d..000000000 --- a/src/srl/MyStruct.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * File Name : MyStruct.h - * Author : Frumes - * - * Create Time : 20061231 - * Project Name NewSRLBaseLine - * Remark : define some stuctures used in the project - * - */ - -#ifndef _MY_STRUCT_ -#define _MY_STRUCT_ -#pragma warning ( disable : 4786 ) - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -/*----------------- typedef define begin-------------------------------------*/ -typedef pair ArgPos; //arg position: (begin,end) -typedef vector< ArgPos > VecPosForVerb; //the args position for current predicate -typedef vector< VecPosForVerb > VecPosForSent; //for current sentence - -typedef vector VecFeatForCons; //the all features for dependency node -typedef vector< VecFeatForCons > VecFeatForVerb; //for predicate -typedef vector< VecFeatForVerb > VecFeatForSent; //for sentence - -typedef pair ArgInfo; //the arg format: arg_type,arg_position - -typedef map > MapSentArg; -/*----------------- typedef define end --------------------------------------*/ - - -/*------------- typedef define begin ----------*/ -typedef struct LTPData -{ - vector vecParent; - vector vecWord; - vector vecPos; - vector vecNe; - vector vecRelation; -} LTPData; - -typedef struct DepNode -{ - int id; - int parent; - deque dequeChildren; - pair constituent; //the begin and end of the arg candidate - string relation; -} DepNode; - -typedef struct DepTree -{ - int nodeNum; - vector vecDepNode; -} DepTree; -/*------------- typedef define begin -----------*/ - - -/*------------ fileName struct ----------*/ -typedef struct FileNameStruct -{ - string m_strSRLConfFileName; - string m_strSRLDicFileName; - string m_strFeaturesFileName; - string m_strPositionsFileName; - string m_strPredicatesFileName; - string m_strWordsFileName; - string m_strDataTextFileName; - string m_strPredictFileName; -} FileNameStruct; -/*------------ fileName struct ----------*/ - -/*------------ fileStream struct --------*/ -typedef struct FileStreamStruct -{ - ofstream outFeaturesFile; - ofstream outPositionsFile; - ofstream outPredicatesFile; - ofstream outWordsFile; - ofstream outDataTextFile; - - ifstream inPredictFile; -} FileStreamStruct; -/*------------ fileStream struct --------*/ - - -#endif - diff --git a/src/srl/MyTree.cpp b/src/srl/MyTree.cpp deleted file mode 100644 index 3ed30c92b..000000000 --- a/src/srl/MyTree.cpp +++ /dev/null @@ -1,669 +0,0 @@ -/* - * File Name : MyTree.cpp - * Author : Frumes, hjliu - * - * Create Time : 20061231 - * Project Name NewSRLBaseLine - * - */ -#include -#include -#include "MyTree.h" - -using namespace std; - -MyTree::MyTree(const LTPData* ltpData) -{ - BuildDepTree(ltpData); -} - -MyTree::~MyTree() -{ - ClearTree(); -} - -int MyTree::GetRootID() const -{ - return m_rootID; -} - -// The interface: return the depNode with index nodeID -void MyTree::GetNodeValue(DepNode& depNode, - int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - depNode = m_depTree.vecDepNode.at(nodeID); -} - - -// Left child: the left child but near to the current node -int MyTree::GetLeftChild(const int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - - deque dequeChildren; - deque::iterator itChildren; - int leftChild = I_NULL_ID; - - DepNode depNode; - GetNodeValue(depNode, nodeID); - dequeChildren = depNode.dequeChildren; - itChildren = dequeChildren.begin(); - while(itChildren != dequeChildren.end()) - { - if(*itChildren < nodeID) - { - leftChild = *itChildren; - } - else // child node id greater than the parent id - { - break; - } - - ++ itChildren; - } - - return leftChild; -} - -// Right child: the right child but near to the current node -int MyTree::GetRightChild(const int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - - deque dequeChildren; - deque::iterator itChildren; - int rightChild = I_NULL_RIGHT; - - DepNode depNode; - GetNodeValue(depNode, nodeID); - dequeChildren = depNode.dequeChildren; - itChildren = dequeChildren.begin(); - while(itChildren != dequeChildren.end()) - { - if(*itChildren > nodeID) - { - //greater than parent node id - rightChild = *itChildren; - break; - } - - ++ itChildren; - } - - return rightChild; -} - -int MyTree::GetLeftSib(const int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - - int leftID = I_NULL_ID; - DepNode depNode; - GetNodeValue(depNode, nodeID); - - int parentID = depNode.parent; - if(parentID < 0) - { //process punctuation or root node - return leftID; - } - - GetNodeValue(depNode, parentID); - deque dequeChildren = depNode.dequeChildren; - deque::iterator itDequeChildren; - - itDequeChildren = dequeChildren.begin(); - while(itDequeChildren != dequeChildren.end()) - { - if(*itDequeChildren < nodeID) - { - leftID = *itDequeChildren; - } - else - { - break; - } - - ++ itDequeChildren; - } - - return leftID; -} - -int MyTree::GetRightSib(const int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - - int rightID = I_NULL_RIGHT; - DepNode depNode; - GetNodeValue(depNode, nodeID); - - int parentID = depNode.parent; - if(parentID < 0) - { //process punctuation or root node - return rightID; - } - - GetNodeValue(depNode, parentID); - deque dequeChildren = depNode.dequeChildren; - deque::iterator itDequeChildren; - - itDequeChildren = dequeChildren.begin(); - while(itDequeChildren != dequeChildren.end()) - { - if(*itDequeChildren > nodeID) - { - rightID = *itDequeChildren; - break; - } - ++ itDequeChildren; - } - - return rightID; -} - -void MyTree::GetAllSibs( - const int nodeID, - deque& dequeSibs) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - - DepNode depNode; - GetNodeValue(depNode, nodeID); - - int parentID = depNode.parent; - if(parentID < 0) - { //punctuation or root - return; - } - - GetNodeValue(depNode, parentID); - dequeSibs = depNode.dequeChildren; - - //delete the current node - deque::iterator itDequeSibs; - itDequeSibs = std::find(dequeSibs.begin(), dequeSibs.end(), nodeID); - if (itDequeSibs != dequeSibs.end()) - { - dequeSibs.erase(itDequeSibs); - } -} - -// Set the path feature of every node for current predicate -void MyTree::GetAllNodePath( - const int intCurPdID, - vector& vecPath) const -{ - assert((intCurPdID < m_depTree.nodeNum) && (intCurPdID >= 0)); - - string strRootPath; - string strCurRel; - int intCurNodeID; - int intParentID; - - //initial the path and predicate path - char str[16]; - vecPath.clear(); - vecPath.resize(m_depTree.nodeNum, S_NULL_STR); - - // itoa(intCurPdID, str, I_RADIX); //pd node: intCurPdID - sprintf(str, "%d", intCurPdID); - vecPath.at(intCurPdID) = str; - strRootPath = str; - - //get the root path and update the path from pd to root - string strCur; - intCurNodeID = intCurPdID; - intParentID = intCurPdID; - // while(!IsRoot(intParentID)) - while(1) - { //the predicate may not be punctuation - intParentID = m_depTree.vecDepNode.at(intCurNodeID).parent; - if(intParentID < 0){ - intParentID = intCurNodeID; - break; - } - intCurNodeID = intParentID; - // itoa(intCurNodeID, str, I_RADIX); - sprintf(str, "%d", intCurNodeID); - - strCur = str; - strRootPath = strCur + S_PATH_DOWN +strRootPath; - vecPath.at(intCurNodeID) = strRootPath; - } - vecPath.at(intParentID) = strRootPath; //the intParentID is RootID - - //visit the tree using DWS(Width First Search) - queue queDepNode; - deque dequeChildren; - deque::iterator itDequeChildren; - string strParentPath; - string strCurNodePath; - - //get the children of root, and push them to the queue - dequeChildren = m_depTree.vecDepNode.at(intParentID).dequeChildren; - itDequeChildren = dequeChildren.begin(); - while(itDequeChildren != dequeChildren.end()) - { - queDepNode.push(*itDequeChildren); - ++ itDequeChildren; - } - - while(!queDepNode.empty()) - { - //pop the front element of the queue - intCurNodeID = queDepNode.front(); - queDepNode.pop(); - - //check whether current node is along the path: from pd to root - //if no, update the current node path - if(!vecPath.at(intCurNodeID).compare(S_NULL_STR)) - { - intParentID = m_depTree.vecDepNode.at(intCurNodeID).parent; - strParentPath = vecPath.at(intParentID); - // itoa(intCurNodeID, str, I_RADIX); - sprintf(str, "%d", intCurNodeID); - - strCur = str; - strCurNodePath = strCur + S_PATH_UP + strParentPath; - vecPath.at(intCurNodeID) = strCurNodePath; - } - - dequeChildren = m_depTree.vecDepNode.at(intCurNodeID).dequeChildren; - itDequeChildren = dequeChildren.begin(); - while(itDequeChildren != dequeChildren.end()) - { - queDepNode.push(*itDequeChildren); - ++ itDequeChildren; - } - } -} - -// Get the familyship of nodeID1 and nodeID2 -void MyTree::GetFamilyShip( - string& strFShip, - int nodeID1, - int nodeID2) const -{ - assert((nodeID1 < m_depTree.nodeNum) && (nodeID1 >= 0)); - assert((nodeID2 < m_depTree.nodeNum) && (nodeID2 >= 0)); - - if(IsParent(nodeID1, nodeID2)) - { - strFShip = S_FMS_PARENT; - } - else if(IsChild(nodeID1, nodeID2)) - { - strFShip = S_FMS_CHILD; - } - else if(IsSibling(nodeID1, nodeID2)) - { - strFShip = S_FMS_SIBLING; - } - else if(IsAncestor(nodeID1, nodeID2)) - { - strFShip = S_FMS_ANCESTOR; - } - else if(IsPosterity(nodeID1, nodeID2)) - { - strFShip = S_FMS_POSTERITY; - } - else - { - strFShip = S_FMS_OTHER; - } -} - -// Get the recent common parent -int MyTree::GetRCParent(int nodeID1, int nodeID2) const -{ - assert((nodeID1 < m_depTree.nodeNum) && (nodeID1 >= 0)); - assert((nodeID2 < m_depTree.nodeNum) && (nodeID2 >= 0)); - - //if nodeID1 or nodeID2 is punctuation - if ( (m_depTree.vecDepNode.at(nodeID1).parent == I_PUN_PARENT_ID) || - (m_depTree.vecDepNode.at(nodeID2).parent == I_PUN_PARENT_ID) ) - { - return I_NULL_RCP; - } - - int high1 = 0; - int high2 = 0; - int parent1 = nodeID1; - int parent2 = nodeID2; - - //calculate the high of nodeID1 and nodeID2 - while (!IsRoot(parent1)) - { - parent1 = m_depTree.vecDepNode.at(parent1).parent; - high1++; - } - while (!IsRoot(parent2)) - { - parent2 = m_depTree.vecDepNode.at(parent2).parent; - high2++; - } - - //move low node above - parent1 = nodeID1; - parent2 = nodeID2; - if (high1 > high2) - { - for(int i = 0; i < (high1 - high2); i++) - { - parent1 = m_depTree.vecDepNode.at(parent1).parent; - } - } - else - { - for(int i = 0; i < (high2 - high1); i++) - { - parent2 = m_depTree.vecDepNode.at(parent2).parent; - } - } - - //move tow node together - while (parent1 != parent2) - { - parent1 = m_depTree.vecDepNode.at(parent1).parent; - parent2 = m_depTree.vecDepNode.at(parent2).parent; - } - - return parent1; - -} - -bool MyTree::IsRoot(const int nodeID) const -{ - return m_rootID == nodeID; -} - -bool MyTree::IsLeaf(const int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - - if(m_depTree.vecDepNode.at(nodeID).dequeChildren.empty()) - { - return 1; - } - else - { - return 0; - } -} - -// The interface, build the depTree using parent and relation information -bool MyTree::BuildDepTree(const LTPData* ltpData) -{ - InitTree(ltpData); - - return UpdateTree(); -} - -// Update current node using the child's constituent -void MyTree::UpdateNodePS( - DepTree& depTree, - const int nodeID, - const int childNodeID) -{ - int begin = depTree.vecDepNode.at(nodeID).constituent.first; - int end = depTree.vecDepNode.at(nodeID).constituent.second; - int childBeg = depTree.vecDepNode.at(childNodeID).constituent.first; - int childEnd = depTree.vecDepNode.at(childNodeID).constituent.second; - - pair pairPs; - pairPs.first = (begin < childBeg) ? begin : childBeg; - pairPs.second = (end > childEnd) ? end : childEnd; - depTree.vecDepNode.at(nodeID).constituent = pairPs; -} - -// Copy the Nodes position of depTree1 to depTree2 -void MyTree::CopyAllNodePS(const DepTree& depTree) -{ - for(int i = 0; i < m_depTree.nodeNum; i++) - { - m_depTree.vecDepNode.at(i).constituent = depTree.vecDepNode.at(i).constituent; - } -} - -// Initial the Dependency Tree, but the consituent position may -void MyTree::InitTree(const LTPData* ltpData) -{ - int index; - vector::const_iterator itParent; - vector::const_iterator itRelation; - - index = 0; - m_rootID = I_NULL_ID; - itParent = ltpData->vecParent.begin(); - itRelation = ltpData->vecRelation.begin(); - while(itParent != ltpData->vecParent.end()) - { - DepNode depNode; - - depNode.parent = *itParent; - depNode.relation = *itRelation; - depNode.id = index; - depNode.constituent.first = index; - depNode.constituent.second = index; - - m_depTree.vecDepNode.push_back(depNode); - //if relation is "HED", it is the root, else root is -1 - if(!depNode.relation.compare(S_ROOT_REL)) - { - m_rootID = index; - } - - ++ itParent; - ++ itRelation; - ++ index; - } - m_depTree.nodeNum = index; - - //get the children for every node - // size_t id = 0; - for(size_t id = 0; id = 0) //except the root node and punc nodes - { - m_depTree.vecDepNode.at(index).dequeChildren.push_back(id); - } - } -} - -// Update the consituent position for each depNode -bool MyTree::UpdateTree() -{ - if(m_rootID == I_NULL_ID) - { //if there isn`t verb in the sentence, do nothing - return 0; - } - - //a temp copy, used for update - DepTree updateTree = m_depTree; - int rootID = m_rootID; - - //iterate until the root's constituent is updated - while(!IsLeaf(updateTree, rootID)) - { - vector::iterator itDepNode; - int curIndex = 0; - - //check if the node is leaf, if yes update it's constituent and it's parent's - itDepNode = updateTree.vecDepNode.begin(); - while(itDepNode != updateTree.vecDepNode.end()) - { - deque::size_type childNum = (*itDepNode).dequeChildren.size(); //children number - - //scan the children, if leaf then update, else push_back - for(deque::size_type n = 0; n < childNum; n++) - { - int firstChildID = (*itDepNode).dequeChildren.front(); - (*itDepNode).dequeChildren.pop_front(); - - if(IsLeaf(updateTree,firstChildID)) - { //the node of id(depChildren[n]) in updateTree is leaf - UpdateNodePS(updateTree, curIndex, firstChildID); - } - else - { //push the child back - (*itDepNode).dequeChildren.push_back(firstChildID); - } - } - - ++ itDepNode; - ++ curIndex; //next node - } //interior while - - //for debug - //string strTemp; - - } //exterior while - - //update the m_depTree using the updateTree - CopyAllNodePS(updateTree); - - return 1; -} - -// Clear the Tree -void MyTree::ClearTree() -{ - m_depTree.vecDepNode.clear(); - m_depTree.nodeNum = 0; - m_rootID = I_NULL_ID; -} - -bool MyTree::IsLeaf(const DepTree& depTree, - int rootID) const -{ - DepNode depNode; - GetNodeValue(depNode, depTree, rootID); - - if(depNode.dequeChildren.empty()) - { - return 1; - } - else - { - return 0; - } -} - -// Check if node1 is parent of node2 -bool MyTree::IsParent(int parentID, - int childID) const -{ - assert((parentID < m_depTree.nodeNum) && (parentID >= 0)); - assert((childID < m_depTree.nodeNum) && (childID >= 0)); - - int newParentID = m_depTree.vecDepNode.at(childID).parent; - if (newParentID < 0) - { //root or punctuaion - return 0; - } - - if(parentID == newParentID) - { - return 1; - } - else - { - return 0; - } -} - -// Check if node1 is child of node2 -bool MyTree::IsChild(int childID, - int parentID) const -{ - assert((parentID < m_depTree.nodeNum) && (parentID >= 0)); - assert((childID < m_depTree.nodeNum) && (childID >= 0)); - - if(IsParent(parentID, childID)) - { - return 1; - } - else - { - return 0; - } -} - -// Check if node1 is sibling of node2 -bool MyTree::IsSibling(const int nodeID1, - const int nodeID2) const -{ - assert((nodeID1 < m_depTree.nodeNum) && (nodeID1 >= 0)); - assert((nodeID2 < m_depTree.nodeNum) && (nodeID2 >= 0)); - - deque dequeSibs; - deque::iterator itDequeSibs; - - GetAllSibs(nodeID2, dequeSibs); - - itDequeSibs = std::find(dequeSibs.begin(), dequeSibs.end(), nodeID1); - if(itDequeSibs != dequeSibs.end()) - { - return 1; - } - else - { - return 0; - } -} - -// check if node anceID is ancesstor of node postID -bool MyTree::IsAncestor(int anceID, - int postID) const -{ - assert((anceID < m_depTree.nodeNum) && (anceID >= 0)); - assert((postID < m_depTree.nodeNum) && (postID >= 0)); - - int parentID = m_depTree.vecDepNode.at(postID).parent; - - if (parentID < 0) - { //root or punctation node - return 0; - } - - while(!IsRoot(parentID)) - { - if(anceID == parentID) - { - return 1; - } - if(parentID < 0) break; // Added by Carl at 2009.09.29 - parentID = m_depTree.vecDepNode.at(parentID).parent; - } - - return 0; -} - -// check if node postID is posterity of node anceID -bool MyTree::IsPosterity(int postID, - int anceID) const -{ - assert((anceID < m_depTree.nodeNum) && (anceID >= 0)); - assert((postID < m_depTree.nodeNum) && (postID >= 0)); - - if(IsAncestor(anceID, postID)) - { - return 1; - } - else - { - return 0; - } -} - -void MyTree::GetNodeValue(DepNode& depNode, - const DepTree& depTree, - int nodeID) const -{ - assert((nodeID < m_depTree.nodeNum) && (nodeID >= 0)); - depNode = depTree.vecDepNode.at(nodeID); -} - - diff --git a/src/srl/MyTree.h b/src/srl/MyTree.h deleted file mode 100644 index 3c1ca6da1..000000000 --- a/src/srl/MyTree.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * File Name : MyTree.h - * Author : Frumes, hjliu - * - * Create Time : 20061231 - * Project Name NewSRLBaseLine - * - */ - -#ifndef _MY_TREE_ -#define _MY_TREE_ -#pragma warning(disable:4786) - -#include -#include "MyStruct.h" -#include "ConstVar.h" - -class MyTree -{ - public: - MyTree(const LTPData* ltpData); - ~MyTree(); - - int GetRootID() const; - void GetNodeValue(DepNode& depNode, int nodeID) const; - int GetLeftChild(int nodeID) const; - int GetRightChild(int nodeID) const; - int GetLeftSib(int nodeID) const; - int GetRightSib(int nodeID) const; - void GetAllSibs(int nodeID, deque& dequeSibs) const; - void GetAllNodePath(int intCurPdID, vector& vecPath) const; - void GetFamilyShip(string& strFSship, int nodeID1, int nodeID2) const; - int GetRCParent(int nodeID1, int nodeID2) const; - bool IsRoot(int nodeID) const; - bool IsLeaf(int nodeID) const; - - private: - // build and destroy the the tree - bool BuildDepTree(const LTPData* ltpData); - void InitTree(const LTPData* ltpData); - bool UpdateTree(); - void ClearTree(); - - // the family members relationship - bool IsParent(int parentID, int childID) const; - bool IsChild(int childID, int parentID) const; - bool IsSibling(int nodeID1, int nodeID2) const; - bool IsAncestor(int anceID, int postID) const; - bool IsPosterity(int postID, int anceID) const; - - - // other operation - void GetNodeValue( - DepNode& depNode, - const DepTree& depTree, - int nodeID) const; - bool IsLeaf( - const DepTree& depTree, - int rootID) const; - void UpdateNodePS( - DepTree& depTree, - int nodeID, - int childNodeID); - void CopyAllNodePS(const DepTree& depTree); - - public: - DepTree m_depTree; - - private: - int m_rootID; -}; - -#endif - diff --git a/src/srl/Pi/CMakeLists.txt b/src/srl/Pi/CMakeLists.txt new file mode 100644 index 000000000..38a959652 --- /dev/null +++ b/src/srl/Pi/CMakeLists.txt @@ -0,0 +1,14 @@ +SET(SrlPiSRC + config/SrlPiConfig.h + model/SrlPiModel.h + ) + +add_executable(srl_pi_train train.cpp process/TrainSrlPi.cpp ${SrlPiSRC}) +target_link_libraries(srl_pi_train dynet ${LIBS}) + +set_target_properties (srl_pi_train PROPERTIES + OUTPUT_NAME srl_pi_train + RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/) + +add_executable(Pipred pred.cpp process/PredSrlPi.cpp ${SrlPiSRC}) +target_link_libraries(Pipred dynet ${LIBS}) diff --git a/src/srl/Pi/config/SrlPiConfig.h b/src/srl/Pi/config/SrlPiConfig.h new file mode 100644 index 000000000..0e098697e --- /dev/null +++ b/src/srl/Pi/config/SrlPiConfig.h @@ -0,0 +1,67 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef Srl_Pi_CONFIG_H +#define Srl_Pi_CONFIG_H + +#include "config/ModelConf.h" +#include "boost/serialization/access.hpp" + +class SrlPiBaseConfig : public virtual ModelConf { +public: + unsigned word_dim; + unsigned emb_dim; + unsigned pos_dim; + unsigned rel_dim; + unsigned lstm_input_dim; + unsigned lstm_hidden_dim; + unsigned layers; + + string embedding; + + SrlPiBaseConfig(string confName = "Configuration"): ModelConf(confName) { + registerConf("word_dim" , UNSIGNED, word_dim , "word dimension" , 100); + registerConf("emb_dim" , UNSIGNED, emb_dim , "embedding dimension" , 50); + registerConf("pos_dim" , UNSIGNED, pos_dim , "postag dimension" , 12); + registerConf("rel_dim" , UNSIGNED, rel_dim , "relation dim" , 50); + registerConf("lstm_input_dim" , UNSIGNED, lstm_input_dim , "lstm_input_dim" , 100); + registerConf("lstm_hidden_dim", UNSIGNED, lstm_hidden_dim , "lstm_hidden_dim" , 100); + registerConf("layers" , UNSIGNED, layers , "lstm layers" , 1); + + registerConf ("embedding" , STRING, embedding , "embedding", ""); + } + + friend class boost::serialization::access; + template + void serialize(Archive &ar, const unsigned int) { + ar & word_dim; + ar & emb_dim; + ar & pos_dim; + ar & rel_dim; + ar & lstm_input_dim; + ar & lstm_hidden_dim; + ar & layers; + } +}; + +class SrlPiTrainConfig : public virtual SrlPiBaseConfig, public virtual LabelModelTrainerConf { +public: + + SrlPiTrainConfig(string confName = "Configuration"): + SrlPiBaseConfig(confName), + LabelModelTrainerConf(confName) + { + + } +}; + +class SrlPiPredConfig : public virtual SrlPiBaseConfig, public virtual LabelModelPredictorConf { +public: + SrlPiPredConfig(string confName = "Configuration"): + SrlPiBaseConfig(confName), + LabelModelPredictorConf(confName) + { } +}; + +#endif //Srl_Pi_CONFIG_H diff --git a/src/srl/Pi/model/SrlPiModel.h b/src/srl/Pi/model/SrlPiModel.h new file mode 100644 index 000000000..c1a5cc489 --- /dev/null +++ b/src/srl/Pi/model/SrlPiModel.h @@ -0,0 +1,123 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_PIMODEL_H +#define PROJECT_PIMODEL_H + +#include +#include +#include "../config/SrlPiConfig.h" +#include "structure/SrlPiSample.h" + +// model builders +#include +#include +#include +#include +#include "structure/WordEmbBuilder.h" + + +class PiModel : public PiSrlModel { + SrlPiBaseConfig & config; + base::Debug debug; + // todo define ModelBuilders + WordEmbBuilder emb_lookup; + LookupModelBuilder word_lookup, pos_lookup, rel_lookup; + BiLSTMModelBuilder lstm; + AffineTransformModelBuilder sentTransform, resultTransform; + + +public: + PiModel(SrlPiBaseConfig &config) : + PiSrlModel(config), + config(config), debug("PiModel") { } + + void initEmbedding(unordered_map > & emb) { + if (config.emb_dim) + emb_lookup.setEmb(emb); + } + void initEmbedding() { + if (config.emb_dim) + emb_lookup.loadEmb(config.embedding); + } + + void init() { + vector sentDims; + if (config.word_dim) { + word_lookup = LookupModelBuilder(dict[WORD].size(), config.word_dim); word_lookup.init(model); + sentDims.push_back(config.word_dim); + } + if (config.emb_dim) { + sentDims.push_back(config.emb_dim); + } + if (config.pos_dim) { + pos_lookup = LookupModelBuilder(dict[POS].size(), config.pos_dim); pos_lookup.init(model); + sentDims.push_back(config.pos_dim); + } + if (config.rel_dim) { + rel_lookup = LookupModelBuilder(dict[REL].size(), config.rel_dim); rel_lookup.init(model); + sentDims.push_back(config.rel_dim); + } + sentTransform = AffineTransformModelBuilder(sentDims, config.lstm_input_dim); sentTransform.init(model); + lstm = BiLSTMModelBuilder(config.layers, config.lstm_input_dim, config.lstm_hidden_dim); lstm.init(model); + resultTransform = AffineTransformModelBuilder({config.lstm_hidden_dim}, 2); resultTransform.init(model); + + } + + virtual vector label(ComputationGraph &hg, SrlPiSample &samples) { + vector sents; + for (int j = 0; j < samples.size(); ++j) { + vector wordFeature; + if (config.word_dim) { + wordFeature.push_back(word_lookup.forward(hg, (unsigned) dict[WORD].convert(samples.getWord(j).getWord()))); + } + if (config.emb_dim) { + wordFeature.push_back(dynet::expr::input(hg, {config.emb_dim}, emb_lookup.getEmb(samples.getWord(j).getWord()))); + } + if (config.pos_dim) { + wordFeature.push_back(pos_lookup.forward(hg, (unsigned) dict[POS].convert(samples.getWord(j).getPos()))); + } + if (config.rel_dim) { + wordFeature.push_back(rel_lookup.forward(hg, (unsigned) dict[REL].convert(samples.getWord(j).getRel()))); + } + sents.push_back(sentTransform.forward(hg, wordFeature)); + } + lstm.newGraph(hg); + lstm.startNewSequence(); + vector lstm_out = lstm.forward(hg, sents); + for (int k = 0; k < lstm_out.size(); ++k) { + lstm_out[k] = softmax(resultTransform.forward(hg, {activate(lstm_out[k])})); + } + return lstm_out; + } + + virtual Expression + ExtractError(ComputationGraph &hg, vector &adists, SrlPiSample &samples, Performance &perf) { + assert(adists.size() == samples.size()); + vector err; + for (int j = 0; j < adists.size(); ++j) { + vector ans = as_vector(hg.incremental_forward(adists[j])); + int is_pred = (int) (samples.getWord(j).getPredicate() == PRED_LABEL); + setPerf(perf, is_pred, ans); + err.push_back(pick(log(adists[j]), (unsigned) is_pred)); + } + return -sum(err); + } + + virtual void ExtractResults(ComputationGraph &hg, vector &adists, SrlPiSample &samples) { + assert(adists.size() == samples.size()); + for (int j = 0; j < adists.size(); ++j) { + int god = getMaxId(as_vector(hg.incremental_forward(adists[j]))); + samples.getWord(j).setPredicate((bool) god); + } + unsigned long pred_size = samples.getPredicateList().size(); + for (int j = 0; j < adists.size(); ++j) { + samples.getWord(j).getArgs().resize(pred_size, NIL_LABEL); + } + } + +}; + + +#endif //PROJECT_PIMODEL_H diff --git a/src/srl/Pi/pred.cpp b/src/srl/Pi/pred.cpp new file mode 100644 index 000000000..67455b0a0 --- /dev/null +++ b/src/srl/Pi/pred.cpp @@ -0,0 +1,14 @@ +// +// Created by liu on 2017/4/7. +// + +#include "dynet/dynet.h" +#include "base/processLoader.h" +#include "process/PredSrlPi.h" + +using namespace std; + +int main(int argc, char * argv[]) { + base::ProcessLoader processLoader(argc, argv); + return processLoader.runProcess(); +} diff --git a/src/srl/Pi/process/PredSrlPi.cpp b/src/srl/Pi/process/PredSrlPi.cpp new file mode 100644 index 000000000..ff9e0bbfc --- /dev/null +++ b/src/srl/Pi/process/PredSrlPi.cpp @@ -0,0 +1,5 @@ +// +// Created by liu on 2017-05-12. +// + +#include "PredSrlPi.h" diff --git a/src/srl/Pi/process/PredSrlPi.h b/src/srl/Pi/process/PredSrlPi.h new file mode 100644 index 000000000..65add9b29 --- /dev/null +++ b/src/srl/Pi/process/PredSrlPi.h @@ -0,0 +1,53 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_PREDSTNLSTM_H +#define PROJECT_PREDSTNLSTM_H + +#include +#include +#include "../config/SrlPiConfig.h" +#include "structure/SrlPiSample.h" +#include "../model/SrlPiModel.h" +#include "extractor/ConverterDataToSrlPiSample.h" + +using namespace std; + +class PredSrlPi : public LabelModelSGDSeqPredictor{ + SrlPiPredConfig & config; + PiModel model; + extractor::ConverterMultiLineFileReader fileReader; + ConverterDataToSrlPiSample conv_toSample; +public: + PredSrlPi(SrlPiPredConfig &config) + : LabelModelSGDSeqPredictor(config, model), + config(config), model(config) {} + + virtual void init() { + initSample(testSamples, config.test_data); + model.loadDict(); // load dict + model.init(); // init parameters + model.load(); // load model + model.initEmbedding(); + } + + virtual void extractResult() { + conv_toSample.iconv(&testSamples); + fileReader.reWriteFile(config.output); + } + +private: + + void initSample(vector& samples, string file) { + vector fileName = {file}; + fileReader.init(fileName); + fileReader.run(); + conv_toSample.init(fileReader.getResult()); + conv_toSample.run(); + samples = conv_toSample.getResult(); + } +}; + + +#endif //PROJECT_PREDSTNLSTM_H diff --git a/src/srl/Pi/process/TrainSrlPi.cpp b/src/srl/Pi/process/TrainSrlPi.cpp new file mode 100644 index 000000000..ad152d462 --- /dev/null +++ b/src/srl/Pi/process/TrainSrlPi.cpp @@ -0,0 +1,5 @@ +// +// Created by liu on 2017-05-12. +// + +#include "TrainSrlPi.h" diff --git a/src/srl/Pi/process/TrainSrlPi.h b/src/srl/Pi/process/TrainSrlPi.h new file mode 100644 index 000000000..9e2922998 --- /dev/null +++ b/src/srl/Pi/process/TrainSrlPi.h @@ -0,0 +1,53 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_TRAINSTNLSTM_H +#define PROJECT_TRAINSTNLSTM_H + +#include "process/LabelModelSGDSeqTrainer.h" +#include "../config/SrlPiConfig.h" +#include "structure/SrlPiSample.h" +#include "../model/SrlPiModel.h" +#include "extractor/ConverterMultiLineFileReader.h" +#include "extractor/ConverterDataToSrlPiSample.h" +#include "extractor/ExtractorFileToWordEmb.h" + + +class TrainSrlPi : public LabelModelSGDSeqTrainer { + SrlPiTrainConfig &config; + PiModel model; +public: + TrainSrlPi(SrlPiTrainConfig &config) + : LabelModelSGDSeqTrainer(config, model), + config(config), model(config) + {} + + void init() { + initSample(trainSamples, config.training_data); + initSample(devSamples, config.dev_data); + + model.registerDict(trainSamples); + + model.init(); // init model size + model.load(); + model.initEmbedding(); + + } + +private: + void initSample(vector & samples, string file) { + vector fileName = {file}; + extractor::ConverterMultiLineFileReader fileReader; + fileReader.init(fileName); + fileReader.run(); + + ConverterDataToSrlPiSample conv_toSample; + conv_toSample.init(fileReader.getResult()); + conv_toSample.run(); + samples = conv_toSample.getResult(); + } +}; + + +#endif //PROJECT_TRAINSTNLSTM_H diff --git a/src/srl/Pi/train.cpp b/src/srl/Pi/train.cpp new file mode 100644 index 000000000..7df6441a6 --- /dev/null +++ b/src/srl/Pi/train.cpp @@ -0,0 +1,14 @@ +// +// Created by liu on 2017/4/7. +// + +#include "dynet/dynet.h" +#include "base/processLoader.h" +#include "process/TrainSrlPi.h" + +using namespace std; + +int main(int argc, char * argv[]) { + base::ProcessLoader processLoader(argc, argv); + return processLoader.runProcess(); +} diff --git a/src/srl/SRLBaseline.cpp b/src/srl/SRLBaseline.cpp deleted file mode 100644 index 05359f95d..000000000 --- a/src/srl/SRLBaseline.cpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * File Name : SRLBaseline.h - * Author : Frumes - * - * Updated by : jiangfeng - * Update Time : 2013-8-21 - * - */ - -#include "SRLBaseline.h" - -SRLBaseline::SRLBaseline(string configXml, string selectFeats) - : m_dataPreProc(NULL), - m_featureExtractor(NULL), - m_featureCollection(NULL) -{ -} - -SRLBaseline::~SRLBaseline() -{ - if (m_dataPreProc) { delete m_dataPreProc; } - if (m_featureCollection) { delete m_featureCollection; } - if (m_featureExtractor) { delete m_featureExtractor; } -} - -// Check if the node will be filtered: only when the node -// is predicate and punctation -inline bool SRLBaseline::IsFilter(int nodeID, int intCurPd) const -{ - DepNode depNode; - m_dataPreProc->m_myTree->GetNodeValue(depNode, nodeID); - - //the punctuation nodes, current predicate node - //changed for PTBtoDep, only filter the current predicate - if(nodeID == intCurPd) - { - return 1; - } - else - { - return 0; - } - - //return 0; -} - - -void SRLBaseline::SetPredicate(const vector& vecPred) -{ - m_vecPredicate = vecPred; -} - -void SRLBaseline::setDataPreProc(const DataPreProcess* dataPreProc) -{ - m_dataPreProc = dataPreProc; -} - -bool SRLBaseline::isVerbPOS(const string& POS) const -{ - return m_configuration.is_verbPOS(POS); -} - diff --git a/src/srl/SRLBaseline.h b/src/srl/SRLBaseline.h deleted file mode 100644 index ccde96024..000000000 --- a/src/srl/SRLBaseline.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * File Name : SRLBaseline.h - * Author : Frumes - * - * Updated by : jiangfeng - * Update Time : 2013-8-21 - * - */ - -#ifndef _SRL_BASELINE_ -#define _SRL_BASELINE_ -#pragma warning(disable:4786) - -#include -#include "DataPreProcess.h" -#include "Configuration.h" -#include "FeatureExtractor.h" - -using namespace std; - -class SRLBaseline -{ - public: - SRLBaseline(string configXml, string selectFeats); - ~SRLBaseline(); - - public: - void setDataPreProc(const DataPreProcess* dataPreProc); - void SetPredicate(const vector& vecPred); - bool isVerbPOS(const string& POS) const; - - protected: - bool IsFilter(int nodeID, int intCurPd) const; - - protected: - const DataPreProcess *m_dataPreProc; - Configuration m_configuration; - FeatureExtractor *m_featureExtractor; - FeatureCollection *m_featureCollection; - vector m_prgFeatureNumbers; - vector m_srlFeatureNumbers; - vector m_prgFeaturePrefixes; - vector m_srlFeaturePrefixes; - vector m_vecPredicate; - vector< vector > m_srlSelectFeatures; -}; - -#endif - diff --git a/src/srl/SRLBaselineExt.cpp b/src/srl/SRLBaselineExt.cpp deleted file mode 100644 index 4b1d1fb9f..000000000 --- a/src/srl/SRLBaselineExt.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* - * File Name : SRLBaselineExt.cpp - * Author : msmouse - * - * Updated by : jiangfeng - * Update Time : 2013-8-21 - * - */ - -#include "SRLBaselineExt.h" -#include "Configuration.h" -#include "FeatureExtractor.h" -#include - -SRLBaselineExt::SRLBaselineExt(string configXml, string selectFeats) - :SRLBaseline(configXml, selectFeats) -{ - m_configuration.load_xml(configXml); - m_featureExtractor = new FeatureExtractor(m_configuration); - m_featureCollection = new FeatureCollection(); - - m_srlFeatureNumbers.clear(); - m_srlFeaturePrefixes.clear(); - m_prgFeatureNumbers.clear(); - m_prgFeaturePrefixes.clear(); - - get_feature_config(); - open_select_config(selectFeats); -} - -SRLBaselineExt::~SRLBaselineExt() -{ -} - -void SRLBaselineExt::ExtractPrgFeatures(vector< vector >& vecPrgFeatures) const -{ - vecPrgFeatures.clear(); - - Sentence sentence; - - vector vecRows; - convert2ConllFormat(vecRows); - - sentence.from_corpus_block(vecRows); - const size_t row_count = sentence.get_row_count(); - - m_featureExtractor->set_target_sentence(sentence); - - m_featureExtractor->calc_node_features(); - - vector< vector > vec_feature_values; - for (size_t i = 0; i < m_prgFeatureNumbers.size(); ++i) - { - vector feature_values; - - const int feature_number = m_prgFeatureNumbers[i]; - const string& feature_prefix = m_prgFeaturePrefixes[i]; - bool feature_empty_flag = false; - try { - m_featureExtractor->get_feature_for_rows(feature_number, feature_values); - } catch (...) { - feature_empty_flag = true; - } - - if (feature_empty_flag) - { - feature_values.clear(); - for (size_t row = 0; row <= row_count; ++row) - { - feature_values.push_back(""); - } - } - - vec_feature_values.push_back(feature_values); - } - - for (size_t row = 1; row <= row_count; ++row) - { - vector instance; - for (size_t i = 0; i < m_prgFeatureNumbers.size(); ++i) - { - string feature = m_prgFeaturePrefixes[i] + "@" - + vec_feature_values[i][row]; - instance.push_back(feature); - } - vecPrgFeatures.push_back(instance); - } -} - -void SRLBaselineExt::ExtractSrlFeatures( - VecFeatForSent& vecAllFeatures, - VecPosForSent& vecAllPos) const -{ - vecAllFeatures.clear(); - vecAllPos.clear(); - - Sentence sentence; - - map feat_number_index; - feat_number_index.clear(); - - for (size_t k = 0; k < m_srlFeatureNumbers.size(); ++k) - { - feat_number_index[m_srlFeatureNumbers[k]] = k; - } - - vector vecRows; - convert2ConllFormat(vecRows); - - sentence.from_corpus_block(vecRows); - const size_t predicate_count = sentence.get_predicates().size(); - const size_t row_count = sentence.get_row_count(); - - //feature_extractor.set_target_sentence(sentence); - m_featureExtractor->set_target_sentence(sentence); - vector feature_values; - vector< vector > all_feature_values; - - // loop for each predicate - for (size_t predicate_index = 0; predicate_index < predicate_count; ++predicate_index) - { - VecFeatForVerb vecFeatAllCons; - VecFeatForCons vecForCons; - VecPosForVerb vecPosVerb; - - int predID = m_vecPredicate[predicate_index]; - all_feature_values.clear(); - - // calculate features - //feature_extractor.calc_features(predicate_index); - m_featureExtractor->calc_features(predicate_index); - - // loop for each feature - for (size_t i = 0; i < m_srlFeatureNumbers.size(); ++i) - { - const int feature_number = m_srlFeatureNumbers[i]; - const string& feature_prefix = m_srlFeaturePrefixes[i]; - bool feature_empty_flag = false; - try - { - m_featureExtractor->get_feature_for_rows(feature_number, feature_values); - } - catch (...) - { - feature_empty_flag = true; - } - - if (feature_empty_flag) - { - feature_values.clear(); - // loop for each row - for (size_t row = 1; row <= row_count; ++row) - { - feature_values.push_back(""); - } - } - - all_feature_values.push_back(feature_values); - } - - for (size_t row = 1; row <= row_count; ++row) - { - vecForCons.clear(); - if (IsFilter(row-1, predID)) - continue; - for (size_t i = 0; i < m_srlSelectFeatures.size(); ++i) - { - string select_feature; - select_feature.clear(); - for (size_t j = 0; j < m_srlSelectFeatures[i].size(); ++j) - { - string feat_name = m_srlSelectFeatures[i][j]; - int feat_number = m_featureCollection->get_feature_number(feat_name); - int value_index = feat_number_index[feat_number]; - if (j == m_srlSelectFeatures[i].size()-1) - select_feature += m_srlFeaturePrefixes[value_index] + "@" + all_feature_values[value_index][row]; - else - select_feature += m_srlFeaturePrefixes[value_index] + "@" + all_feature_values[value_index][row] + "+"; - } - vecForCons.push_back(select_feature); - } - vecFeatAllCons.push_back(vecForCons); - } - - vecAllFeatures.push_back(vecFeatAllCons); - - for (int nodeID = 0; nodeID < m_dataPreProc->m_intItemNum; nodeID++) - { - int predID = m_vecPredicate[predicate_index]; - if (!IsFilter(nodeID, predID)) - { - //get position of unFiltered nodes, and push_back to vecPosVerb - DepNode curNode; - m_dataPreProc->m_myTree->GetNodeValue(curNode, nodeID); - vecPosVerb.push_back(curNode.constituent); - } - } - vecAllPos.push_back(vecPosVerb); - } -} - -void SRLBaselineExt::convert2ConllFormat(vector& vecRows) const -{ - size_t row_count = m_dataPreProc->m_ltpData->vecWord.size(); - size_t predicate_count = m_vecPredicate.size(); - - for (size_t id = 1; id <= row_count; ++id) - { - ostringstream row; - row.str(""); - /*construct a line with element: word, pos, relation, .etc*/ - row << id; // first column: id - row << " " << m_dataPreProc->m_ltpData->vecWord[id-1]; // second column: form - row << " " << m_dataPreProc->m_ltpData->vecWord[id-1]; // third column: lemma, same with form - row << " " << m_dataPreProc->m_ltpData->vecWord[id-1]; // forth column: plemma, same with lemma - row << " " << m_dataPreProc->m_ltpData->vecPos[id-1]; // fifth column: pos - row << " " << m_dataPreProc->m_ltpData->vecPos[id-1]; // sixth column: ppos, same with ppos - row << " " << "_"; // 7th column: feat: null - row << " " << "_"; // 8th column: pfeat: null - - if (m_dataPreProc->m_ltpData->vecParent[id-1] == -2) - { - row << " " << 0; - row << " " << 0; - } - else - { - row << " " << m_dataPreProc->m_ltpData->vecParent[id-1] + 1; - row << " " << m_dataPreProc->m_ltpData->vecParent[id-1] + 1; - } - - row << " " << m_dataPreProc->m_ltpData->vecRelation[id-1]; //deprel - row << " " << m_dataPreProc->m_ltpData->vecRelation[id-1]; //pdeprel - - if (count(m_vecPredicate.begin(), m_vecPredicate.end(), id - 1) != 0) // fillpred - { - row << " " << "Y"; - row << " " << "Y"; - } - else - { - row << " " << "_"; - row << " " << "_"; - } - - for (size_t args = 0; args < predicate_count; ++args) // make room for args - row << " " << "_"; - - /*finish construct a line*/ - vecRows.push_back(row.str()); - } -} - -void SRLBaselineExt::get_feature_config() -{ - /* feature set for role labeling */ - const vector& argu_feat_set = m_configuration.get_argu_config().get_feature_names(); - - /* feature set for predicate recognization */ - const vector& prg_feat_set = m_configuration.get_pred_recog_config().get_feature_names(); - - m_srlFeatureNumbers.clear(); - m_srlFeaturePrefixes.clear(); - for (size_t i=0; iget_feature_number(feature_name); - const string& feature_prefix - = m_featureCollection->get_feature_prefix(feature_number); - - if ( (find(m_srlFeatureNumbers.begin(), - m_srlFeatureNumbers.end(), - feature_number)) - == m_srlFeatureNumbers.end()) // not find - { - m_srlFeatureNumbers.push_back(feature_number); - m_srlFeaturePrefixes.push_back(feature_prefix); - } - } - - m_prgFeatureNumbers.clear(); - m_prgFeaturePrefixes.clear(); - for (size_t i=0; iget_feature_number(feature_name); - const string& feature_prefix - = m_featureCollection->get_feature_prefix(feature_number); - - if ( (find(m_prgFeatureNumbers.begin(), - m_prgFeatureNumbers.end(), - feature_number)) == m_prgFeatureNumbers.end()) // not find - { - m_prgFeatureNumbers.push_back(feature_number); - m_prgFeaturePrefixes.push_back(feature_prefix); - } - } -} - -void SRLBaselineExt::open_select_config(string selectConfig) -{ - ifstream conf_input(selectConfig.c_str()); - if (!conf_input) - { - throw runtime_error("select_config file cannot open!"); - } - m_srlSelectFeatures.clear(); - string line; - while (getline(conf_input, line)) - { - if ("" != line) - { - if ('#' == line[0]) - { - continue; - } - vector vec_str; - replace(line.begin(), line.end(), '+', ' '); - istringstream istr(line); - string temp_str; - while (istr >> temp_str) - { - vec_str.push_back(temp_str); - } - m_srlSelectFeatures.push_back(vec_str); - } - } - conf_input.close(); -} - -bool SRLBaselineExt::IsFilter(int nodeID, int intCurPd) const -{ - DepNode depNode; - m_dataPreProc->m_myTree->GetNodeValue(depNode, nodeID); - - //the punctuation nodes, current predicate node - //changed for PTBtoDep, only filter the current predicate - if( (nodeID == intCurPd) || - (depNode.parent < 0) || - ( (depNode.constituent.first <= intCurPd) && - (depNode.constituent.second >= intCurPd) ) ) - { - return 1; - } - else - { - return 0; - } -} - diff --git a/src/srl/SRLBaselineExt.h b/src/srl/SRLBaselineExt.h deleted file mode 100644 index 1bce4214b..000000000 --- a/src/srl/SRLBaselineExt.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * File Name : SRLBaselineExt.h - * Author : msmouse - * - * Updated by : jiangfeng - * Update Time : 2013-8-21 - * - */ - -#ifndef __SRL_BASELINE_EXT__ -#define __SRL_BASELINE_EXT__ - -#include "SRLBaseline.h" -#include "Configuration.h" -#include "FeatureExtractor.h" - -class SRLBaselineExt : public SRLBaseline -{ -public: - SRLBaselineExt(string configXml, string selectFeats); - ~SRLBaselineExt(); - -public: - void ExtractSrlFeatures( - VecFeatForSent& vecAllFeatures, - VecPosForSent& vecAllPos - ) const; - - void ExtractPrgFeatures( - vector< vector >& vecPrgFeatures - ) const; - - void convert2ConllFormat( - vector& vecRows - ) const; - - void get_feature_config(); - - void open_select_config(string selectConfig); - -protected: - bool IsFilter(int nodeID, int intCurPd) const; - -}; - -#endif - diff --git a/src/srl/SRL_DLL.cpp b/src/srl/SRL_DLL.cpp index 02cb90e4f..372821108 100644 --- a/src/srl/SRL_DLL.cpp +++ b/src/srl/SRL_DLL.cpp @@ -9,91 +9,55 @@ using namespace std; static DepSRL g_depSRL; +// helper functions +int isLegalInput(const vector &words, const vector &POSs, const vector< pair > &parse); + + // Load Resources -int SRL_LoadResource(const string &ConfigDir) +int srl_load_resource(const string &modelFile) { - if (0 == g_depSRL.LoadResource(ConfigDir)) return -1; - return 0; + return g_depSRL.LoadResource(modelFile); } // Release Resources -int SRL_ReleaseResource() +int srl_release_resource() { - if (0 == g_depSRL.ReleaseResource()) return -1; - return 0; + return g_depSRL.ReleaseResource(); } -// perform SRL -int DoSRL( +/** + * + */ +int srl_dosrl( const vector &words, const vector &POSs, - const vector &NEs, - const vector< pair > &parse, - vector< pair< int, vector< pair > > > > &tmp_vecSRLResult) + const vector > &parse, + vector > > > > &vecSRLResult +) { + vecSRLResult.clear(); + if (!isLegalInput(words, POSs, parse)) return -1; + return g_depSRL.GetSRLResult(words, POSs, parse, vecSRLResult); +} + + +// helper functions + + +int isLegalInput(const vector &words, const vector &POSs, const vector< pair > &parse) { - if (words.size() != POSs.size() - || words.size() != parse.size() - || words.size() != NEs.size()) { - return -1; + if (words.size() != POSs.size() || words.size() != parse.size()) { + return false; } int len = words.size(); for (int i = 0; i < len; ++ i) { - if (words[i].empty() || POSs[i].empty() || NEs.empty()) { - return -1; + if (words[i].empty() || POSs[i].empty()) { + return false; } int father = parse[i].first; if (father < -1 || father >= len || parse[i].second.empty()) { - return -1; + return false; } } - - tmp_vecSRLResult.clear(); - - if (0 == g_depSRL.GetSRLResult(words, POSs, NEs, parse, tmp_vecSRLResult)) { - return -1; - } - - return tmp_vecSRLResult.size(); + return true; } - -int GetSRLResult_size( - vector< pair< int, vector< pair > > > > &vecSRLResult, - vector< pair< int, vector< pair > > > > &tmp_vecSRLResult) -{ - if (vecSRLResult.size() != tmp_vecSRLResult.size()) { - cerr << "vecSRLResult size != tmp_vecSRLResult size" << endl; - return -1; - } - int i = 0; - for (; i < vecSRLResult.size(); ++i) { - vecSRLResult[i].first = tmp_vecSRLResult[i].second.size(); - } - return 0; -} - -int GetSRLResult( - vector< pair< int, vector< pair > > > > &vecSRLResult, - vector< pair< int, vector< pair > > > > &tmp_vecSRLResult) -{ - if (vecSRLResult.size() != tmp_vecSRLResult.size()) { - cerr << "vecSRLResult size != tmp_vecSRLResult size" << endl; - return -1; - } - int i = 0; - for (; i < vecSRLResult.size(); ++i) { - if (vecSRLResult[i].second.size() != tmp_vecSRLResult[i].second.size()) { - cerr << "vecSRLResult[i].second.size() != tmp_vecSRLResult[i].second.size()" << endl - << "i = " << i << endl; - } - vecSRLResult[i].first = tmp_vecSRLResult[i].first; - int j = 0; - for (; j < tmp_vecSRLResult[i].second.size(); ++j) { - vecSRLResult[i].second[j].first = tmp_vecSRLResult[i].second[j].first.c_str(); - vecSRLResult[i].second[j].second.first = tmp_vecSRLResult[i].second[j].second.first; - vecSRLResult[i].second[j].second.second = tmp_vecSRLResult[i].second[j].second.second; - } - } - return 0; -} - diff --git a/src/srl/SRL_DLL.h b/src/srl/SRL_DLL.h index 64ed10d84..77d9d3457 100644 --- a/src/srl/SRL_DLL.h +++ b/src/srl/SRL_DLL.h @@ -21,38 +21,26 @@ using namespace std; #pragma comment(lib, "srl.lib") #endif #endif - -SRL_DLL_API int SRL( +/** + * + * @param words + * @param POSs + * @param parse + * @param vecSRLResult 谓词数组[<谓词序号, 论元数组[<论元label, <位置开始,位置结束>>]>] 序号从0开始 + * @return 执行情况 正常返回0 异常返回-1 + */ +SRL_DLL_API int srl_dosrl( const vector &words, const vector &POSs, - const vector &NEs, - const vector< pair > &parse, - vector< pair< int, vector< pair > > > > &vecSRLResult + const vector > &parse, + vector > > > > &vecSRLResult ); // Load Resources -SRL_DLL_API int SRL_LoadResource(const string &ConfigDir); +SRL_DLL_API int srl_load_resource(const string &modelFile); // Release Resources -SRL_DLL_API int SRL_ReleaseResource(); - -// Perform SRL -SRL_DLL_API int DoSRL( - const vector &words, - const vector &POSs, - const vector &NEs, - const vector< pair > &parse, - vector< pair< int, vector< pair > > > > &tmp_vecSRLResult -); - -SRL_DLL_API int GetSRLResult_size( - vector< pair< int, vector< pair > > > > &vecSRLResult, - vector< pair< int, vector< pair > > > > &tmp_vecSRLResult); - - -SRL_DLL_API int GetSRLResult( - vector< pair< int, vector< pair > > > > &vecSRLResult, - vector< pair< int, vector< pair > > > > &tmp_vecSRLResult); +SRL_DLL_API int srl_release_resource(); #endif diff --git a/src/srl/SRL_DLL_x.cpp b/src/srl/SRL_DLL_x.cpp deleted file mode 100644 index 6f50debc3..000000000 --- a/src/srl/SRL_DLL_x.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "SRL_DLL.h" - -int SRL( - const vector &words, - const vector &POSs, - const vector &NEs, - const vector< pair > &parse, - vector< pair< int, vector< pair > > > > &vecSRLResult - ) -{ - vecSRLResult.clear(); - vector< pair< int, vector< pair > > > > tmp_vecSRLResult; - int resultNum = DoSRL(words, POSs, NEs, parse,tmp_vecSRLResult); - if (resultNum < 0) return -1; - if (resultNum == 0) return 0; - vecSRLResult.resize(resultNum); - if (0 != GetSRLResult_size(vecSRLResult,tmp_vecSRLResult)) return -1; - int i = 0; - for (; i < resultNum; ++i) { - vecSRLResult[i].second.resize( vecSRLResult[i].first ); - } - return GetSRLResult(vecSRLResult,tmp_vecSRLResult); -} diff --git a/src/srl/Sentence.cpp b/src/srl/Sentence.cpp deleted file mode 100644 index 2b00edfb0..000000000 --- a/src/srl/Sentence.cpp +++ /dev/null @@ -1,245 +0,0 @@ -/* - * File Name : Sentence.cpp - * Author : msmouse - * Create Time : 2006-12-31 - * Project Name : NewSRLBaseLine - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - */ - - -#include "Sentence.h" -#include -#include -#include "boost/lexical_cast.hpp" - - -#define _DEBUG_ -#ifdef _DEBUG_ -#include -#endif - -using namespace std; - -void Sentence::from_corpus_block( - const std::vector &corpus_block) - // const Configuration& config) -{ - size_t row_count = corpus_block.size(); - - // make room for data storage - resize_(row_count); - - vector > children_of_node(row_count+1); - - // loop for each line - for (size_t row=1; row <= row_count; ++row) - { - istringstream line_stream(corpus_block[row-1]); // row ID starts at 1 - - size_t ID; - line_stream>>ID; - assert(row == ID); - - // get other fields; - for (size_t field = FIELD_FORM; field < FIELD_NUMBER; ++field) - { - line_stream>>m_fields[row][field]; - } - - // get arguments - size_t predicate_number = 0; - string argument; - while (line_stream>>argument) - { - ++predicate_number; - - if (predicate_number > m_argument_columns.size()) - { - m_argument_columns.resize(predicate_number); - m_argument_columns[predicate_number-1].push_back(string()); - // row starts at 1 - } - - if ("_" == argument) - { - m_argument_columns[predicate_number-1].push_back(string()); - } - else - { - m_argument_columns[predicate_number-1].push_back(argument); - } - } - - // predicate - if ("Y" == m_fields[row][FIELD_FILLPRED]) - { - // m_predicates.push_back(Predicate(row, get_predicate_type_try_hard_(config,row))); - m_predicates.push_back(Predicate(row)); - } - - // parent and child relationship - size_t parent = boost::lexical_cast(m_fields[row][FIELD_HEAD]); - m_HEADs.push_back(parent); - children_of_node[parent].push_back(row); - } - - if (m_predicates.size() != m_argument_columns.size()) - { - m_argument_columns.resize(m_predicates.size()); //proinsight - // cout< node_queue; - node_queue.push(0); - - while (!node_queue.empty()) - { - size_t node = node_queue.front(); - node_queue.pop(); - node_iter = m_node_of_row[node]; - - for (int i = 0; i < children_of_node[node].size(); ++ i) { - size_t child = children_of_node[node][i]; - m_node_of_row[child] = m_parse_tree.append_child(node_iter, child); - node_queue.push(child); - } - } - -} - -const std::string Sentence::to_corpus_block() const -{ - ostringstream output_stream; - - size_t row_count = m_fields.size()-1; - - for (size_t row=1; row<=row_count; ++row) - { - // row ID - output_stream< &predicate_rows) -{ - m_predicates.clear(); - - for (size_t row=1; row<=m_row_count; ++row) - { - m_fields[row][FIELD_PRED] = "_"; - m_fields[row][FIELD_FILLPRED] = "_"; - } - for (size_t i=0; i(-1)); - - m_row_count = row_count; -} - diff --git a/src/srl/Sentence.h b/src/srl/Sentence.h deleted file mode 100644 index ac3e12d6e..000000000 --- a/src/srl/Sentence.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * File Name : Sentence.h - * Author : msmouse - * Create Time : 2006-12-31 - * Project Name : NewSRLBaseLine - * - * Updated by : jiangfeng - * Update Time : 2013-08-21 - */ - -#ifndef _SENTENCE_H_ -#define _SENTENCE_H_ - -#include -#include -#include "Configuration.h" -#include "tree.hh" -#include "boost/multi_array.hpp" - -#include - -typedef size_t RowID; -typedef tree SRLTree; - -class Predicate -{ - public: - - explicit Predicate(size_t _row) - : row(_row) {} - - size_t row; -}; - -class Sentence -{ - public: - Sentence() {} - - // a Sentence can be create from: - void from_corpus_block( - const std::vector &corpus_block); - // const Configuration& config); - - // a Sentence can be transformed to: - const std::string to_corpus_block() const; - - // set predicate - void set_predicates(const std::vector &predicate_rows); - - // set predicate (affects only FIELD_PRED, for predicate sense setting) - void set_PRED(const size_t row, const std::string &PRED); - - // clear the sentence - void clear(); - private: - Sentence(const Sentence &); - Sentence& operator=(const Sentence &); - - public: - enum FIELD - { - FIELD_FORM, FIELD_LEMMA, FIELD_PLEMMA, FIELD_POS, FIELD_PPOS, - FIELD_FEAT, FIELD_PFEAT, FIELD_HEAD, FIELD_PHEAD, FIELD_DEPREL, - FIELD_PDEPREL, FIELD_FILLPRED, FIELD_PRED, FIELD_NUMBER - }; // see CoNLL2009 shared task web site for field definitions - - public: - // get row count - const size_t get_row_count() const - { - return m_row_count; - } - - // get the parse tree node corresponding to a specified row - const SRLTree::iterator& get_node_of_row(const size_t row) const - { - return m_node_of_row[row]; - } - - // corpus field getter: - const std::string& get_field(const size_t row, const int field) const - { - return m_fields[row][field]; - } - - const std::string& get_FORM(const size_t row) const - { - return get_field(row, FIELD_FORM); - } - - const std::string& get_PLEMMA(const size_t row) const - { - return get_field(row, FIELD_PLEMMA); - } - - const std::string& get_PPOS(const size_t row) const - { - return get_field(row, FIELD_PPOS); - } - - const std::string& get_PFEAT(const size_t row) const - { - return get_field(row, FIELD_PFEAT); - } - - const std::string& get_PHEAD_str(const size_t row) const - { - return get_field(row, FIELD_HEAD); - } - - const size_t get_PHEAD(const size_t row) const - { - return (size_t)atoi(get_field(row, FIELD_PHEAD).c_str()); - } - - const size_t get_HEAD(const size_t row) const - { - return m_HEADs[row]; - } - - const std::string& get_PDEPREL(const size_t row) const - { - return get_field(row, FIELD_PDEPREL); - } - - const std::string& get_DEPREL(const size_t row) const - { - return get_field(row, FIELD_DEPREL); - } - - const std::string& get_FILLPRED(const size_t row) const - { - return get_field(row, FIELD_FILLPRED); - } - - const std::string& get_PRED(const size_t row) const - { - return get_field(row, FIELD_PRED); - } - - // get and set semantic arguments - const std::string& get_argument( - const size_t predicate_index, - const size_t row) const; - - void set_argument( - const size_t predicate_index, - const size_t row, - const std::string& argument_name); - - // get predicates - const std::vector& get_predicates() const - { - return m_predicates; - } - - // get predicates (reference) - std::vector& get_predicates() - { - return m_predicates; - } - - // get parse tree - const SRLTree& get_parse_tree() const - { - return m_parse_tree; - } - - private: - // make storage space for a specified number of rows - void resize_(const size_t row_count); - - private: - size_t m_row_count; - boost::multi_array m_fields; - std::vector m_predicates; - std::vector > m_argument_columns; - std::vector m_HEADs; - SRLTree m_parse_tree; - std::vector m_node_of_row; -}; - -#endif - diff --git a/src/srl/Srl/CMakeLists.txt b/src/srl/Srl/CMakeLists.txt new file mode 100644 index 000000000..bffd995ff --- /dev/null +++ b/src/srl/Srl/CMakeLists.txt @@ -0,0 +1,14 @@ +SET(SrlSrlSRC + config/SrlSrlConfig.h + model/SrlSrlModel.h + model/SrlSrlModel.cpp) + +add_executable(srl_srl_train train.cpp process/TrainSrlSrl.cpp ${SrlSrlSRC}) +target_link_libraries(srl_srl_train dynet ${LIBS}) + +set_target_properties (srl_srl_train PROPERTIES + OUTPUT_NAME srl_srl_train + RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/) + +add_library(srl_srl_static_lib STATIC ${SrlSrlSRC}) +target_link_libraries(srl_srl_static_lib dynet ${LIBS}) diff --git a/src/srl/Srl/config/SrlSrlConfig.h b/src/srl/Srl/config/SrlSrlConfig.h new file mode 100644 index 000000000..212e695ba --- /dev/null +++ b/src/srl/Srl/config/SrlSrlConfig.h @@ -0,0 +1,69 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef Srl_Srl_CONFIG_H +#define Srl_Srl_CONFIG_H + +#include "config/ModelConf.h" + +class SrlSrlBaseConfig : public virtual ModelConf { +public: + unsigned word_dim; + unsigned emb_dim; + unsigned pos_dim; + unsigned rel_dim; + unsigned position_dim; + unsigned lstm_input_dim; + unsigned lstm_hidden_dim; + unsigned hidden_dim; + unsigned layers; + string embedding; + SrlSrlBaseConfig(string confName = "Configuration"): ModelConf(confName) { + registerConf("word_dim" , UNSIGNED, word_dim , "word dimension" , 100); + registerConf("emb_dim" , UNSIGNED, emb_dim , "embedding dimension" , 50); + registerConf("pos_dim" , UNSIGNED, pos_dim , "postag dimension" , 12); + registerConf("rel_dim" , UNSIGNED, rel_dim , "relation dimension" , 50); + registerConf("position_dim" , UNSIGNED, position_dim , "position dimension" , 5); + registerConf("lstm_input_dim" , UNSIGNED, lstm_input_dim , "lstm_input_dim" , 100); + registerConf("lstm_hidden_dim", UNSIGNED, lstm_hidden_dim , "lstm_hidden_dim" , 100); + registerConf("hidden_dim" , UNSIGNED, hidden_dim , "Hidden state dimension",100); + registerConf("layers" , UNSIGNED, layers , "lstm layers" , 1); + + registerConf ("embedding" , STRING, embedding , "word embedding file", ""); + } + + + friend class boost::serialization::access; + template + void serialize(Archive &ar, const unsigned int) { + ar & word_dim; + ar & emb_dim; + ar & pos_dim; + ar & rel_dim; + ar & position_dim; + ar & lstm_input_dim; + ar & lstm_hidden_dim; + ar & hidden_dim; + ar & layers; + } +}; + +class SrlSrlTrainConfig : public virtual SrlSrlBaseConfig, public virtual LabelModelTrainerConf { +public: + + SrlSrlTrainConfig(string confName = "Configuration"): + SrlSrlBaseConfig(confName), + LabelModelTrainerConf(confName) + { } +}; + +class SrlSrlPredConfig : public virtual SrlSrlBaseConfig, public virtual LabelModelPredictorConf { +public: + SrlSrlPredConfig(string confName = "Configuration"): + SrlSrlBaseConfig(confName), + LabelModelPredictorConf(confName) + { } +}; + +#endif //Srl_Srl_CONFIG_H diff --git a/src/srl/Srl/model/SrlSrlModel.cpp b/src/srl/Srl/model/SrlSrlModel.cpp new file mode 100644 index 000000000..019f2926f --- /dev/null +++ b/src/srl/Srl/model/SrlSrlModel.cpp @@ -0,0 +1,125 @@ +// +// Created by liu on 2017/5/21. +// + +#include "SrlSrlModel.h" + +vector SrlSrlModel::label(ComputationGraph &hg, SrlPiSample &samples) { + ctx_sent_lstm.newGraph(hg); stx_sent_lstm.newGraph(hg); stx_rel_lstm.newGraph(hg); + vector ans; + vector predicates = samples.getPredicateList(); + for (int j = 0; j < predicates.size(); ++j) { + vector one_pred_label = labelOnePredicate(hg, samples, predicates[j]); + ans.insert(ans.end(), one_pred_label.begin(), one_pred_label.end()); + } + return ans; +} + +vector SrlSrlModel::labelOnePredicate(ComputationGraph &hg, SrlPiSample &samples, int predIndex) { + vector sentList, relList; + Expression sent_root = activate(sentTransform.forward(hg, { + word_lookup.forward(hg, (unsigned) dict[WORD].convert(ROOT_MARK)), + dynet::expr::input(hg, {config.emb_dim}, emb_lookup.getEmb(ROOT_MARK)), + pos_lookup.forward(hg, (unsigned) dict[POS].convert(ROOT_MARK)), + })); + Expression rel_root = rel_lookup.forward(hg, (unsigned) dict[REL].convert(ROOT_MARK)); + for (int j = 0; j < samples.size(); ++j) { + vector sent; + if (config.word_dim) { + sent.push_back(word_lookup.forward(hg, (unsigned) dict[WORD].convert(samples.getWord(j).getWord()))); + } + if (config.emb_dim) { + sent.push_back(dynet::expr::input(hg, {config.emb_dim}, emb_lookup.getEmb(samples.getWord(j).getWord()))); + } + if (config.pos_dim) { + sent.push_back(pos_lookup.forward(hg, (unsigned) dict[POS].convert(samples.getWord(j).getPos()))); + } + sentList.push_back(activate(sentTransform.forward(hg, sent))); + + if (config.rel_dim) { + relList.push_back(rel_lookup.forward(hg, (unsigned) dict[REL].convert(samples.getWord(j).getRel()))); + } + } + ctx_sent_lstm.startNewSequence(); + vector sent_lstm_out_list = ctx_sent_lstm.forward(hg, sentList); + vector ans; + for (int j = 0; j < samples.size(); ++j) { + vector pred_path, arg_path; + getStnPath(samples, predIndex, j, pred_path, arg_path); + stx_sent_lstm.startNewSequence(); stx_rel_lstm.startNewSequence(); + Expression stx_sent_out = stx_sent_lstm.forwardBy2Order(hg, sentList, pred_path, arg_path, sent_root); + Expression stx_rel_out = stx_rel_lstm.forwardBy2Order(hg, relList, pred_path, arg_path, rel_root); + + Expression position = position_lookup.forward(hg, dict[POSITION].convert(samples.getWord(j).getPosition())); + Expression hidden = hiddenTransform.forward(hg, { + position, + sent_lstm_out_list[j], + sent_lstm_out_list[predIndex], + stx_sent_out, + stx_rel_out + }); + ans.push_back(softmax(resultTransform.forward(hg, {activate(hidden)}))); + } + return ans; +} + +void SrlSrlModel::getStnPath(SrlPiSample &samples, int predIndex, int argIndex, vector &predPath, + vector &argPath) { + vector is_on_pred_path(samples.size(), false); + predPath.resize(0); argPath.resize(0); + for (int p = predIndex; p != -1; p = samples.getWord(p).getParent()) { + is_on_pred_path[p] = true; + predPath.push_back(p); + } + predPath.push_back(-1); + + int nca; + for (nca = argIndex; nca != -1; nca = samples.getWord(nca).getParent()) { + if (is_on_pred_path[nca]) break; + argPath.push_back(nca); + } + argPath.push_back(nca); + + if (nca != -1) { + predPath.erase(find(predPath.begin(), predPath.end(), nca) + 1, predPath.end()); + } + return; +} + +void SrlSrlModel::init() { + // sent + vector sentDims; + if (config.word_dim) { + word_lookup = LookupModelBuilder(dict[WORD].size(), config.word_dim); word_lookup.init(model); + sentDims.push_back(config.word_dim); + } + if (config.emb_dim) { + sentDims.push_back(config.emb_dim); + } + if (config.pos_dim) { + pos_lookup = LookupModelBuilder(dict[POS].size(), config.pos_dim); pos_lookup.init(model); + sentDims.push_back(config.pos_dim); + } + + sentTransform = AffineTransformModelBuilder(sentDims, config.lstm_input_dim); sentTransform.init(model); + ctx_sent_lstm = BiLSTMModelBuilder(config.layers, config.lstm_input_dim, config.lstm_hidden_dim); ctx_sent_lstm.init(model); + + if (config.rel_dim) { + rel_lookup = LookupModelBuilder(dict[REL].size(), config.rel_dim); rel_lookup.init(model); + } + stx_sent_lstm = BiLSTMModelBuilder(config.layers, config.lstm_input_dim, config.lstm_hidden_dim); stx_sent_lstm.init(model); + stx_rel_lstm = BiLSTMModelBuilder(config.layers, config.rel_dim, config.lstm_hidden_dim); stx_rel_lstm.init(model); + + if (config.position_dim) { + position_lookup = LookupModelBuilder(dict[POSITION].size(), config.position_dim); position_lookup.init(model); + } + + hiddenTransform = AffineTransformModelBuilder({ + config.position_dim, + config.lstm_hidden_dim, // pred_sent + config.lstm_hidden_dim, // arg_sent + config.lstm_hidden_dim, // stx_sent + config.lstm_hidden_dim // stx_rel + }, config.hidden_dim); hiddenTransform.init(model); + resultTransform = AffineTransformModelBuilder({config.hidden_dim}, dict[ARG].size()); resultTransform.init(model); +} diff --git a/src/srl/Srl/model/SrlSrlModel.h b/src/srl/Srl/model/SrlSrlModel.h new file mode 100644 index 000000000..d226be1c0 --- /dev/null +++ b/src/srl/Srl/model/SrlSrlModel.h @@ -0,0 +1,87 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_STNLSTM_H +#define PROJECT_STNLSTM_H + +#include +#include +#include +#include "../config/SrlSrlConfig.h" +#include "structure/SrlPiSample.h" + + +// model builders +#include +#include +#include +#include +#include +#include + + +class SrlSrlModel : public PiSrlModel { + SrlSrlBaseConfig & config; + base::Debug debug; + // todo define ModelBuilders + WordEmbBuilder emb_lookup; + LookupModelBuilder word_lookup, pos_lookup, rel_lookup, position_lookup; + BiLSTMModelBuilder ctx_sent_lstm, stx_sent_lstm, stx_rel_lstm; + AffineTransformModelBuilder sentTransform, hiddenTransform, resultTransform; + + +public: + SrlSrlModel(SrlSrlBaseConfig &config) : + PiSrlModel(config), config(config), debug("SrlSrlModel") { } + + void initEmbedding(unordered_map > & emb) { + if (config.emb_dim) + emb_lookup.setEmb(emb); + } + void initEmbedding() { + if (config.emb_dim) + emb_lookup.loadEmb(config.embedding); + } + + void init(); + + virtual vector label(ComputationGraph &hg, SrlPiSample &samples); + + vector labelOnePredicate(ComputationGraph &hg, SrlPiSample &samples, int predIndex); + + void getStnPath(SrlPiSample &samples, int predIndex, int argIndex, vector& predPath, vector& argPath); + + virtual Expression + ExtractError(ComputationGraph &hg, vector &adists, SrlPiSample &samples, Performance &perf) { + // todo define your loss + vector err; + vector predicates = samples.getPredicateList(); + assert(samples.size() * predicates.size() == adists.size()); + int w_size = samples.size(), p_size = (int) predicates.size(); + for (int pi = 0; pi < p_size; pi++) { + for (int wi = 0; wi < w_size; ++wi) { + int god = dict[ARG].convert(samples.getWord(wi).getArgs()[pi]); + setPerf(perf, god, as_vector(hg.incremental_forward(adists[pi * w_size + wi])), dict[ARG].convert(NIL_LABEL)); + err.push_back(log(pick(adists[pi * w_size + wi], god))); + } + } + return -sum(err); + } + + virtual void ExtractResults(ComputationGraph &hg, vector &adists, SrlPiSample &samples) { + vector predicates = samples.getPredicateList(); + assert(samples.size() * predicates.size() == adists.size()); + int w_size = samples.size(), p_size = (int) predicates.size(); + for (int wi = 0; wi < w_size; ++wi) { + samples.getWord(wi).getArgs().resize((unsigned long) p_size); + for (int pi = 0; pi < p_size; pi++) { + int pred = getMaxId(as_vector(hg.incremental_forward(adists[pi * w_size + wi]))); + samples.getWord(wi).getArgs()[pi] = dict[ARG].convert(pred); + } + } + } +}; + + +#endif //PROJECT_STNLSTM_H diff --git a/src/srl/Srl/pred.cpp b/src/srl/Srl/pred.cpp new file mode 100644 index 000000000..0c11acce6 --- /dev/null +++ b/src/srl/Srl/pred.cpp @@ -0,0 +1,14 @@ +// +// Created by liu on 2017/4/7. +// + +#include "dynet/dynet.h" +#include "base/processLoader.h" +#include "process/PredSrlSrl.h" + +using namespace std; + +int main(int argc, char * argv[]) { + base::ProcessLoader processLoader(argc, argv); + return processLoader.runProcess(); +} diff --git a/src/srl/Srl/process/PredSrlSrl.cpp b/src/srl/Srl/process/PredSrlSrl.cpp new file mode 100644 index 000000000..fd226fee6 --- /dev/null +++ b/src/srl/Srl/process/PredSrlSrl.cpp @@ -0,0 +1,5 @@ +// +// Created by liu on 2017-05-12. +// + +#include "PredSrlSrl.h" diff --git a/src/srl/Srl/process/PredSrlSrl.h b/src/srl/Srl/process/PredSrlSrl.h new file mode 100644 index 000000000..19d5cba02 --- /dev/null +++ b/src/srl/Srl/process/PredSrlSrl.h @@ -0,0 +1,57 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_PREDSTNLSTM_H +#define PROJECT_PREDSTNLSTM_H + +#include +#include +#include "../config/SrlSrlConfig.h" +#include "structure/SrlPiSample.h" +#include "../model/SrlSrlModel.h" +#include "extractor/ConverterDataToSrlPiSample.h" + +using namespace std; + +class PredSrlSrl : public LabelModelSGDSeqPredictor{ + SrlSrlPredConfig & config; + SrlSrlModel model; + ConverterDataToSrlPiSample conv_toSample; + extractor::ConverterMultiLineFileReader fileReader; +public: + PredSrlSrl(SrlSrlPredConfig &config) + : LabelModelSGDSeqPredictor(config, model), + config(config), model(config) {} + + virtual void init() { + + // todo prepair + // todo 1. prepair testSamples (to fill 'testSamples') + initSample(testSamples, config.test_data); + model.loadDict(); // load dict + model.init(); // init parameters + model.load(); // load model + model.initEmbedding(); + } + + virtual void extractResult() { + conv_toSample.iconv(&testSamples); + fileReader.reWriteFile(config.output); + } + +private: + + void initSample(vector& samples, string file) { + vector fileName = {file}; + fileReader.init(fileName); + fileReader.run(); + + conv_toSample.init(fileReader.getResult()); + conv_toSample.run(); + samples = conv_toSample.getResult(); + } +}; + + +#endif //PROJECT_PREDSTNLSTM_H diff --git a/src/srl/Srl/process/TrainSrlSrl.cpp b/src/srl/Srl/process/TrainSrlSrl.cpp new file mode 100644 index 000000000..8f76be1e4 --- /dev/null +++ b/src/srl/Srl/process/TrainSrlSrl.cpp @@ -0,0 +1,5 @@ +// +// Created by liu on 2017-05-12. +// + +#include "TrainSrlSrl.h" diff --git a/src/srl/Srl/process/TrainSrlSrl.h b/src/srl/Srl/process/TrainSrlSrl.h new file mode 100644 index 000000000..8b19771fc --- /dev/null +++ b/src/srl/Srl/process/TrainSrlSrl.h @@ -0,0 +1,54 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_TRAINSTNLSTM_H +#define PROJECT_TRAINSTNLSTM_H + +#include "process/LabelModelSGDSeqTrainer.h" +#include "../config/SrlSrlConfig.h" +#include "structure/SrlPiSample.h" +#include "../model/SrlSrlModel.h" +#include "extractor/ConverterMultiLineFileReader.h" +#include "extractor/ConverterDataToSrlPiSample.h" +#include "extractor/ExtractorFileToWordEmb.h" + + +class TrainSrlSrl : public LabelModelSGDSeqTrainer { + SrlSrlTrainConfig &config; + SrlSrlModel model; +public: + TrainSrlSrl(SrlSrlTrainConfig &config) + : LabelModelSGDSeqTrainer(config, model), + config(config), model(config) + {} + + void init() { + initSample(trainSamples, config.training_data); + initSample(devSamples, config.dev_data); + + model.registerDict(trainSamples); + + model.initEmbedding(); + model.init(); // init model size + model.load(); + + } + +private: + void initSample(vector & samples, string file) { + vector fileName = {file}; + extractor::ConverterMultiLineFileReader fileReader; + fileReader.init(fileName); + fileReader.run(); + + ConverterDataToSrlPiSample conv_toSample; + conv_toSample.init(fileReader.getResult()); + conv_toSample.run(); + samples = conv_toSample.getResult(); + } + +}; + + +#endif //PROJECT_TRAINSTNLSTM_H diff --git a/src/srl/Srl/train.cpp b/src/srl/Srl/train.cpp new file mode 100644 index 000000000..d54b1cac6 --- /dev/null +++ b/src/srl/Srl/train.cpp @@ -0,0 +1,14 @@ +// +// Created by liu on 2017/4/7. +// + +#include "dynet/dynet.h" +#include "base/processLoader.h" +#include "process/TrainSrlSrl.h" + +using namespace std; + +int main(int argc, char * argv[]) { + base::ProcessLoader processLoader(argc, argv); + return processLoader.runProcess(); +} diff --git a/src/srl/common/CMakeLists.txt b/src/srl/common/CMakeLists.txt new file mode 100644 index 000000000..598e4ccee --- /dev/null +++ b/src/srl/common/CMakeLists.txt @@ -0,0 +1,14 @@ +set(CommonSRC + model/AffineTransformModelBuilder.cpp + model/CNN1dLayerBuilder.cpp + process/LabelModelSGDSeqTrainer.h + model/PiSrlModel.h + structure/SrlPiSample.cpp + structure/WordEmbBuilder.h + model/RNNModelBuilder.h model/BiRNNModelBuilder.h) + + +add_library(common_static_lib STATIC ${CommonSRC}) +target_link_libraries(common_static_lib ${LIBS}) +#add_library(common_shared_lib SHARED ${CommonSRC}) +#target_link_libraries(common_shared_lib ${LIBS}) \ No newline at end of file diff --git a/src/srl/common/Const.h b/src/srl/common/Const.h new file mode 100644 index 000000000..860562d9d --- /dev/null +++ b/src/srl/common/Const.h @@ -0,0 +1,21 @@ +// +// Created by liu on 2017/4/7. +// + +#ifndef PROJECT_CONST_H +#define PROJECT_CONST_H + +#include "string" +#include "vector" +using namespace std; +const string BLANK_WORD = ""; +const string UNK_WORD = ""; +const string NIL_LABEL = "_"; +const string PRED_LABEL = "Y"; +const string ROOT_MARK = ""; + + +const string S_QTY_ARG = "QTY"; +const vector S_QTY_POS_PAT = {"ad", "cd", "m", "q"}; + +#endif //PROJECT_CONST_H \ No newline at end of file diff --git a/src/srl/common/config/ModelConf.h b/src/srl/common/config/ModelConf.h new file mode 100644 index 000000000..c0bddbe98 --- /dev/null +++ b/src/srl/common/config/ModelConf.h @@ -0,0 +1,84 @@ +// +// Created by liu on 2017/2/22. +// + +#ifndef PROJECT_MODELCONF_H +#define PROJECT_MODELCONF_H + +#include "base/config.h" +#include "base/debug.h" + +class DynetConf : virtual public base::DebugConfig { +public: + int dynet_gpus; + string dynet_mem; + string dynet_gpu_ids; + unsigned dynet_seed; + DynetConf(string confName = "Configuration"): base::DebugConfig(confName) { + registerConf("dynet-mem", STRING, dynet_mem, "", "1000"); + registerConf ("dynet-seed", UNSIGNED, dynet_seed, "dynet_seed", 0); + registerConf("dynet-gpus", INT, dynet_gpus, "", -1); + registerConf("dynet-gpu-ids", STRING, dynet_gpu_ids, "", "0"); + } + +}; + +class ModelConf : virtual public DynetConf { +public: + + string model; + string activate; + + ModelConf(string confName = "Configuration"): DynetConf(confName) { + registerConf ("model,m" , STRING , model , "model path" ); + registerConf ("activate" , STRING , activate , "activate" , "rectify"); + } + +}; + + +class LabelModelTrainerConf : virtual public ModelConf { +public: + + string training_data; + string dev_data; + float et0; + float eta_decay; + float best_perf_sensitive; + unsigned max_iter; + unsigned batch_size; + unsigned batches_to_save; + + bool use_dropout; + float dropout_rate; + + int use_auto_stop; + + LabelModelTrainerConf(string confName = "Configuration"): ModelConf(confName) { + registerConf ("training_data,T" , STRING , training_data , "Training corpus" ); + registerConf ("dev_data,d" , STRING , dev_data , "Development corpus" ); + registerConf ("learning_rate" , FLOAT , et0 , "learning rate" ,0.1); + registerConf ("eta_decay" , FLOAT , eta_decay , "eta_decay" ,0.08); + registerConf ("best_perf_sensitive", FLOAT, best_perf_sensitive, "min f upgrade to save model",0.00); + registerConf("max_iter" , UNSIGNED, max_iter , "max training iter(batches)",5000); + registerConf("batch_size" , UNSIGNED, batch_size , "batch_size" ,1000); + registerConf("batches_to_save" , UNSIGNED, batches_to_save,"after x batches to save model",10); + + registerConf ("use_dropout" , BOOL , use_dropout , "Use dropout" ); + registerConf ("dropout_rate" , FLOAT , dropout_rate , "dropout rate" ,0.5); + + registerConf ("use_auto_stop" , INT , use_auto_stop , "Use auto stop" , 0); + } +}; + +class LabelModelPredictorConf : virtual public ModelConf { +public: + string test_data; + string output; + LabelModelPredictorConf(string confName = "Configuration"): ModelConf(confName) { + registerConf ("test_data,p" , STRING , test_data , "Test corpus" ); + registerConf ("output,o" , STRING , output , "Testing output labels" ); + } +}; + +#endif //PROJECT_MODELCONF_H diff --git a/src/srl/common/extractor/ConverterDataToSrlPiSample.h b/src/srl/common/extractor/ConverterDataToSrlPiSample.h new file mode 100644 index 000000000..94f551ee8 --- /dev/null +++ b/src/srl/common/extractor/ConverterDataToSrlPiSample.h @@ -0,0 +1,53 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef Srl_CONVENTERDATATO_Pi_SAMPLE_H +#define Srl_CONVENTERDATATO_Pi_SAMPLE_H + +#include "extractor/BiConverter.h" +#include "structure/DataFileBlockContext.h" +#include "../structure/SrlPiSample.h" +using namespace extractor; + +class ConverterDataToSrlPiSample : public BiConverter{ +public: + ConverterDataToSrlPiSample() {} + + virtual void convert(DataFileBlockContext &t1) { + SrlPiSample sample; + for (int j = 0; j < t1.data.size(); ++j) { + sample.push_back(convertLineToWord(t1.data[j])); + } + if (sample.getPredicateList().size()) { + data.push_back(sample); + } + } + + inline Word convertLineToWord(vector & line) { + // 0 1 2 3 4 5 6 7 8 9 0 11 1213 14 + // 4 有人 有人 有人 r r _ _ 5 5 SBV SBV Y 有人.01 A1 _ _ _ _ _ _ _ + int index = lexical_cast(line[0]) - 1; + int parent = lexical_cast(line[8]) - 1; + vector labels; + for (int j = 14 /*14 位置开始论元标号*/; j < line.size(); ++j) { + labels.push_back(line[j]); + } + Word word(index, line[1], line[4], parent, line[10] ,(index <= parent ? "before" : "after"), line[12], labels); + return word; + } + + virtual void iconvOne(DataFileBlockContext &t1, SrlPiSample &t2, int innerIndex) { + assert(t1.data.size() == t2.size()); + for (int j = 0; j < t2.size(); ++j) { + vector& line = t1.data[j]; + line[12] = t2.getWord(j).getPredicate(); + line[13] = (line[12] == PRED_LABEL ? line[1] + ".01": NIL_LABEL); + line.erase(line.begin() + 14, line.end()); + line.insert(line.end(), t2.getWord(j).getArgs().begin(), t2.getWord(j).getArgs().end()); + } + } + +}; + +#endif //Srl_CONVENTERDATATO_Pi_SAMPLE_H diff --git a/src/srl/common/extractor/ConverterFileContextToWordEmb.h b/src/srl/common/extractor/ConverterFileContextToWordEmb.h new file mode 100644 index 000000000..ff38e24bf --- /dev/null +++ b/src/srl/common/extractor/ConverterFileContextToWordEmb.h @@ -0,0 +1,65 @@ +// +// Created by liu on 2017/1/4. +// + +#ifndef PROJECT_CONVERTERFILECONTEXTTOWORDEMB_H +#define PROJECT_CONVERTERFILECONTEXTTOWORDEMB_H + +#include "structure/DataFileContext.h" +#include "extractor/AbstractConverter.h" +#include "base/debug.h" +#include "unordered_map" +#include "vector" +#include "base/progressBar.h" +#include "boost/lexical_cast.hpp" + +using boost::lexical_cast; +using namespace std; +using namespace extractor; + +typedef unordered_map > WordEmb; + +class ConverterFileContextToWordEmb : public AbstractConverter, WordEmb> { +public: + WordEmb data; + vector * origin; + base::Debug debug; + + ConverterFileContextToWordEmb(): debug(ConverterFileContextToWordEmb::getClassName()) {} + virtual void init(vector & origin_data) { + origin = & origin_data; + } + + virtual void run() { + int size = origin->size(); + debug.debug("Convert 'DataFileContext' to 'WordEmb' start. total %u lines", size); + base::ProgressBar bar(size); + for (int i = 0; i < size; ++i) { + convert((*origin)[i]); + if (bar.updateLength(i + 1)) debug.info("(%d)%s lines is converted %d data generate.", i + 1, bar.getProgress(i + 1).c_str() , data.size()); + } + debug.debug("Convert 'DataFileContext' to 'WordEmb' finish. generate %d WordEmb(s)", data.size()); + } + + virtual void convert(DataFileContext & line) { + if (line.data.size() < 5) + return; + data[line.data[0]] = vector(); + vector & vec = data[line.data[0]]; + for (int j = 1; j < line.data.size(); ++j) { + if (line.data[j] != "") + vec.push_back(lexical_cast(line.data[j])); + } + }; + + virtual WordEmb & getResult() { + return this->data; + } + + static string getClassName() { + return "Converter "; + } +}; + + +#endif //PROJECT_CONVERTERFILECONTEXTTOWORDEMB_H diff --git a/src/srl/common/extractor/ExtractorFileToWordEmb.h b/src/srl/common/extractor/ExtractorFileToWordEmb.h new file mode 100644 index 000000000..cbb40e354 --- /dev/null +++ b/src/srl/common/extractor/ExtractorFileToWordEmb.h @@ -0,0 +1,45 @@ +// +// Created by liu on 2017/1/4. +// + +#ifndef PROJECT_EXTRACTORFILETOWORDEMB_H +#define PROJECT_EXTRACTORFILETOWORDEMB_H + +#include "extractor/AbstractExtractor.h" +#include "structure/DataFileName.h" +#include "ConverterFileContextToWordEmb.h" +#include "extractor/ConverterFileReader.h" +#include "base/debug.h" +using namespace extractor; +using namespace std; + +class ExtractorFileToWordEmb: public AbstractExtractor { +public: + DataFileName * startPtr; + base::Debug debug; + DataFileName file; + ExtractorFileToWordEmb() : debug("ExtractorFileToWordEmb") { } + + void init(const string& fileName) { + file = DataFileName(fileName); + init(file); + } + + void init(DataFileName &start) { + startPtr = & start; + } + WordEmb run() { + vector fileName = {*(startPtr)}; + ConverterFileReader fileReader; + ConverterFileContextToWordEmb fileContextToWordEmb; + fileReader.init(fileName); + fileReader.run(); + fileContextToWordEmb.init(fileReader.getResult()); + fileContextToWordEmb.run(); + WordEmb res = fileContextToWordEmb.getResult(); + return res; + }; +}; + + +#endif //PROJECT_EXTRACTORFILETOWORDEMB_H diff --git a/src/srl/common/model/AffineTransformModelBuilder.cpp b/src/srl/common/model/AffineTransformModelBuilder.cpp new file mode 100644 index 000000000..1002e8cc6 --- /dev/null +++ b/src/srl/common/model/AffineTransformModelBuilder.cpp @@ -0,0 +1,17 @@ +// +// Created by liu on 2017/5/11. +// + +#include "AffineTransformModelBuilder.h" + + +Expression AffineTransformModelBuilder::forward(dynet::ComputationGraph &hg, const vector& features) { + assert(features.size() == mulParams.size()); + vector tranParams; + tranParams.push_back(parameter(hg, bias)); + for (int i = 0; i < mulParams.size(); ++i) { + tranParams.push_back(parameter(hg, mulParams[i])); + tranParams.push_back(features[i]); + } + return affine_transform(tranParams); +} \ No newline at end of file diff --git a/src/srl/common/model/AffineTransformModelBuilder.h b/src/srl/common/model/AffineTransformModelBuilder.h new file mode 100644 index 000000000..73c68da9b --- /dev/null +++ b/src/srl/common/model/AffineTransformModelBuilder.h @@ -0,0 +1,67 @@ +// +// Created by liu on 2017/2/20. +// + +#ifndef TTPLAB_AFFINETRANSFORMMODELBUILDER_H +#define TTPLAB_AFFINETRANSFORMMODELBUILDER_H + +#include "./ModelBuilder.h" +#include "dynet/nodes.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/timing.h" +#include "dynet/rnn.h" +#include "dynet/gru.h" +#include "dynet/lstm.h" +#include "dynet/dict.h" +#include +#include +#include "vector" + +using namespace dynet; +using namespace dynet::expr; +using namespace std; + +/** + * 输出表达式 = (输入表达式 * 类内参数)+ 偏移 + * 注意顺序 + * 对象命名含有顺序 + */ +class AffineTransformModelBuilder: public ModelBuilder, Expression> { + vector inputDims; + unsigned outDim; + Parameter bias; + vector mulParams; +public: + AffineTransformModelBuilder(vector inputDims = {}, unsigned outDim = 0): + inputDims(inputDims), + outDim(outDim) + { } + + void setInputDims(const vector &inputDims) { + AffineTransformModelBuilder::inputDims = inputDims; + } + + void setOutDim(unsigned int outDim) { + AffineTransformModelBuilder::outDim = outDim; + } + + virtual void init(dynet::Model &model) { + assert(inputDims.size() > 0); + assert(outDim > 0); + bias = model.add_parameters({outDim}); + for (int i = 0; i < inputDims.size(); ++i) { + mulParams.push_back(model.add_parameters({outDim, inputDims[i]})); + } + } + + virtual Expression forward(dynet::ComputationGraph &hg, const vector& features); + + virtual vector _debug_get_para(dynet::ComputationGraph &hg) { + return as_vector(hg.incremental_forward(parameter(hg, bias))); + } + +}; + + +#endif //TTPLAB_AFFINETRANSFORMMODELBUILDER_H diff --git a/src/srl/common/model/BaseLabelModel.h b/src/srl/common/model/BaseLabelModel.h new file mode 100644 index 000000000..dbb74088d --- /dev/null +++ b/src/srl/common/model/BaseLabelModel.h @@ -0,0 +1,278 @@ +// +// Created by liu on 2017/2/22. +// + +#ifndef PROJECT_BASELABELMODEL_H +#define PROJECT_BASELABELMODEL_H + + +#include "base/debug.h" +#include "dynet/dict.h" +#include +#include +#include "dynet/expr.h" +#include "structure/Performance.h" +#include "structure/Prediction.h" +#include "config/ModelConf.h" +#include "Const.h" + +#include +#include +#include +#include +#include +#include "model/LookupModelBuilder.h" + +using namespace dynet; +using namespace dynet::expr; + +template +class BaseLabelModel : public model::Model { + ModelConf& config; + base::Debug debug; +public: + dynet::Model model; + vector dict; + float dropout_rate = 0; // 默认关闭dropout + + BaseLabelModel(ModelConf &config) : config(config), debug("BaseLabelModel") { } + + void save() { + ofstream out(config.model); + boost::archive::text_oarchive oa(out); + oa << dict; oa << model; + out.close(); + debug.debug("model saved in '%s'", config.model.c_str()); + } + + void save(boost::archive::binary_oarchive & oa) { + saveDict(oa); saveModel(oa); + } + + void saveDict(boost::archive::binary_oarchive & oa) { + oa << dict; + debug.debug("dict saved in '%s'", config.model.c_str()); + } + + void saveModel(boost::archive::binary_oarchive & oa) { + oa << model; + debug.debug("model saved in '%s'", config.model.c_str()); + } + + bool load(boost::archive::binary_iarchive & ia) { + return loadDict(ia) && loadModel(ia); + } + + bool loadDict(boost::archive::binary_iarchive & ia) { + ia >> dict; + debug.debug("dict loaded in '%s'", config.model.c_str()); + return true; + } + + bool loadModel(boost::archive::binary_iarchive & ia) { + ia >> model; + debug.debug("model loaded in '%s'", config.model.c_str()); + return true; + } + + bool isModelExist() { + ifstream in(config.model); + if (!in) { + in.close(); + return false; + } + in.close(); + return true; + } + + bool load() { + ifstream in(config.model); + if (!in) { + debug.debug("no model found in '%s'", config.model.c_str()); + return false; + } + model = dynet::Model(); + debug.debug("load model in '%s'", config.model.c_str()); + boost::archive::text_iarchive ia(in); + ia >> dict; ia >> model; + in.close(); + return true; + } + + bool loadDict() { + ifstream in(config.model); + if (!in) { + debug.debug("no model found in '%s'", config.model.c_str()); + return false; + } + debug.debug("load dict in '%s'", config.model.c_str()); + boost::archive::text_iarchive ia(in); + ia >> dict; + in.close(); + return true; + } + + void setDropOut(float dropout_rate) { + this->dropout_rate = dropout_rate; + } + + void freezeDict() { + for (int j = 0; j < dict.size(); ++j) { + debug.debug("dict[%d] is frozen at size %u", j, dict[j].size()); + dict[j].freeze(); + } + } + +// 工具函数 +public: + void initParameter(unordered_map >& emb, + LookupModelBuilder & lookupParameter, + dynet::Dict & dict + ) { + double emb_size = emb.size(); + double word_size = dict.size(); + double fill_size = 0; + for (unsigned j = 0; j < dict.size(); ++j) { + if (emb.find(dict.convert(j)) != emb.end()) { + lookupParameter.initialize(j, emb[dict.convert(j)]); + fill_size ++; + } + } + debug.debug("Fill lookup parameter(%.0lf) with emb(%.0lf), (%.0lf %lf.1%%) filled.", word_size, emb_size, fill_size, fill_size/word_size * 100); + } + + /** + * fill parameter with emb + * note: all emb will fill to parameter, this may be expand the dict + * @param emb + * @param lookupParameter + * @param dict + */ + void initParameterAllEmb( + unordered_map >& emb, + LookupModelBuilder & lookupParameter, + dynet::Dict & dict + ) { + int emb_size = emb.size(); + assert(lookupParameter.getInputDim() == emb.size()); + lookupParameter.setInputDim(emb_size); + double fill_size = 0; + for (auto i = emb.begin(); i != emb.end(); i++) { + dict.convert(i->first); + lookupParameter.initialize(dict.convert(i->first), i->second); + fill_size ++; + } + int word_size = dict.size(); + debug.debug("Expand Fill lookup parameter(%d) with emb(%d), (%.0lf %.2lf%%) filled.", word_size, emb_size, fill_size, fill_size / word_size * 100); + + } + +protected: +/** + * 统计信息,更新performance + * @param perf + * @param god {keep the nil label = 0} + * @param adist + */ + void setPerf(Performance & perf, int god, const vector& adist, int nil = 0) { + // evaluate 概率最大标签概率 max_prob 和 概率最大标签 max_idx + int max_idx = getMaxId(adist); + if (god != nil) perf.n_arg += 1; + if (max_idx != nil) { + perf.n_parg += 1; + if (max_idx == god) + perf.tp += 1; + } + } + + int getMaxId(const vector& adist) { + double max_prob = adist[0]; + unsigned max_idx = 0; + for (unsigned i = 1; i < adist.size(); ++i) { + if (adist[i] > max_prob) { + max_prob = adist[i]; + max_idx = i; + } + } + return max_idx; + } + +/** + * 输出预测比较统计 + */ + struct cmp_outcome { + bool operator()(const pair& lpr, + const pair& rpr) const { + return lpr.second > rpr.second; + } + }; + + virtual Prediction extractPrediction(vector probs) { + Prediction prediction; + for (unsigned i = 0; i < probs.size(); ++i) { + prediction.push_back(make_pair(i, (double)probs[i])); + } + sort(prediction.begin(), prediction.end(), cmp_outcome()); + return prediction; + } + + + /** + * 激活函数 + * @param expr + * @return + */ + virtual Expression activate(dynet::expr::Expression expr) { + dynet::expr::Expression nl_hidden; + if (config.activate == "tanh") { + nl_hidden = tanh(expr); + }else if (config.activate == "cube"){ + nl_hidden = cube(expr); + } else { + nl_hidden = rectify(expr); + } + if (dropout_rate > 1e-7) { + nl_hidden = dropout(nl_hidden, dropout_rate); + } + return nl_hidden; + } + + /** + * 绑定两个列表中的每一项 + * + * @param exprList1 + * @param exprList2 + * @return + */ + virtual vector concatenate(vector exprList1, vector exprList2) { + assert(exprList1.size() == exprList2.size()); + vector res; + for (int i = 0; i < exprList1.size(); ++i) { + res.push_back(dynet::expr::concatenate({exprList1[i], exprList2[i]})); + } + return res; + } + +/** + * 列表按照顺序取出 Expression + * + * @param exprList1 + * @param exprList2 + * @param outOfIndex 用于ROOT的表示 + * @return + */ + virtual vector lookUpExprList(vector exprList, vector& indexList, Expression& outOfIndex) { + vector res; + for (int i = 0; i < indexList.size(); ++i) { + if (indexList[i] < 0) { + res.push_back(outOfIndex); + } else { + res.push_back(exprList[indexList[i]]); + } + } + return res; + } +}; + + +#endif //PROJECT_BASELABELMODEL_H diff --git a/src/srl/common/model/BiRNNModelBuilder.h b/src/srl/common/model/BiRNNModelBuilder.h new file mode 100644 index 000000000..bf808c103 --- /dev/null +++ b/src/srl/common/model/BiRNNModelBuilder.h @@ -0,0 +1,156 @@ +// +// Created by liu on 2017/5/22. +// + +#ifndef BILSTM_SRL_BIRNNMODELBUILDER_H +#define BILSTM_SRL_BIRNNMODELBUILDER_H + +#include "./ModelBuilder.h" +#include "./RNNModelBuilder.h" +#include "dynet/nodes.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/timing.h" +#include "dynet/rnn.h" +#include "dynet/gru.h" +#include "dynet/lstm.h" +#include "dynet/dict.h" +#include +#include +#include "vector" + +using namespace dynet; +using namespace dynet::expr; +using namespace std; + +template class BiRNNModelBuilder; +typedef BiRNNModelBuilder BiLSTMModelBuilder; +typedef BiRNNModelBuilder BiGRUModelBuilder; +typedef BiRNNModelBuilder BiSimpleRNNModelBuilder; + +template +class BiRNNModelBuilder : public ModelBuilder, vector> { + unsigned layers; + unsigned inputDim; + unsigned outputDim; + RNNModelBuilder forwardRNN; + RNNModelBuilder backwardRNN; + Parameter begin; + Parameter end; +public: + BiRNNModelBuilder(unsigned layers = 0, unsigned inputDim = 0, unsigned outputDim = 0): + layers(layers), + inputDim(inputDim), + outputDim(outputDim), + forwardRNN(layers, inputDim, outputDim/2), + backwardRNN(layers, inputDim, outputDim/2) + { + assert(outputDim % 2 == 0); + } + + void setLayers(unsigned int layers) { + BiRNNModelBuilder::layers = layers; + forwardRNN.setLayers(layers); + backwardRNN.setLayers(layers); + } + + void setInputDim(unsigned int inputDim) { + BiRNNModelBuilder::inputDim = inputDim; + forwardRNN.setInputDim(inputDim); + backwardRNN.setInputDim(inputDim); + } + + void setOutputDim(unsigned int outputDim) { + BiRNNModelBuilder::outputDim = outputDim; + forwardRNN.setOutputDim(outputDim); + backwardRNN.setOutputDim(outputDim); + } + + virtual void init(Model &model) { + assert(layers > 0); + assert(inputDim > 0); + assert(outputDim > 0); + begin = model.add_parameters({inputDim}); + end = model.add_parameters({inputDim}); + forwardRNN.init(model, begin, end); + backwardRNN.init(model, begin, end); // use backward function will handle this. + } + + void dropOut(float d) { + forwardRNN.dropOut(d); + backwardRNN.dropOut(d); + } + + void disableDropOut() { + forwardRNN.disableDropOut(); + backwardRNN.disableDropOut(); + } + + void newGraph(ComputationGraph &cg) { + forwardRNN.newGraph(cg); + backwardRNN.newGraph(cg); + } + void startNewSequence(vector h_0 = {}) { + forwardRNN.startNewSequence(h_0); + backwardRNN.startNewSequence(h_0); + } + /** + * + * @param hg + * @param aClass + * @return A 2*outputDim Dim Expression + */ + virtual vector forward(dynet::ComputationGraph &hg, const vector& inputList) { + vector res; + vector fw = forwardRNN.forward(hg, inputList); + vector bw = backwardRNN.backward(hg, inputList); + int size = (int) fw.size(); + for (int i = 0; i < size; ++i) { + res.push_back(concatenate({fw[i], bw[i]})); + } + return res; + } + + virtual Expression forwardBack(dynet::ComputationGraph &hg, vector inputList) { + return concatenate({ + forwardRNN.forwardBack(hg, inputList), + backwardRNN.backwardBack(hg, inputList) + }); + } + + virtual Expression forwardBy2Order(dynet::ComputationGraph &hg, vector inputList, vector order1, vector order2) { + return concatenate({ + forwardRNN.forwardByOrder(hg, inputList, order1), + backwardRNN.forwardByOrder(hg, inputList, order2) + }); + } + virtual Expression forwardBy2Order(dynet::ComputationGraph &hg, vector inputList, vector order1, vector order2, Expression& escape) { + return concatenate({ + forwardRNN.forwardByOrder(hg, inputList, order1, escape), + backwardRNN.forwardByOrder(hg, inputList, order2, escape) + }); + } + + virtual Expression forwardBackBy2Order(dynet::ComputationGraph &hg, vector inputList, vector order1, vector order2) { + return concatenate({ + forwardRNN.forwardBackByOrder(hg, inputList, order1), + backwardRNN.forwardBackByOrder(hg, inputList, order2) + }); + } + virtual Expression forwardBackBy2Order(dynet::ComputationGraph &hg, vector inputList, vector order1, vector order2, Expression& escape) { + return concatenate({ + forwardRNN.forwardBackByOrder(hg, inputList, order1, escape), + backwardRNN.forwardBackByOrder(hg, inputList, order2, escape) + }); + } + + virtual Expression forwardBackBy2Path(dynet::ComputationGraph &hg, vector fwPath, vector bwPath) { + return concatenate({ + forwardRNN.forwardBack(hg, fwPath), + backwardRNN.forwardBack(hg, bwPath) + }); + } +}; + + +#endif //BILSTM_SRL_BIRNNMODELBUILDER_H diff --git a/src/srl/common/model/CNN1dLayerBuilder.cpp b/src/srl/common/model/CNN1dLayerBuilder.cpp new file mode 100644 index 000000000..549b93ac3 --- /dev/null +++ b/src/srl/common/model/CNN1dLayerBuilder.cpp @@ -0,0 +1,53 @@ +// +// Created by liu on 2017/4/7. +// + +#include "CNN1dLayerBuilder.h" + +CNN1dLayerBuilder::CNN1dLayerBuilder(int in_rows, int k_fold_rows, int filter_width, int in_nfmaps, int out_nfmaps, int out_length) + : in_rows(in_rows), k_fold_rows(k_fold_rows), filter_width(filter_width), + in_nfmaps(in_nfmaps), out_nfmaps(out_nfmaps), out_length(out_length) { + + if (k_fold_rows < 1 || ((in_rows / k_fold_rows) * k_fold_rows != in_rows)) { + cerr << "Bad k_fold_rows=" << k_fold_rows << endl; + abort(); + } +} + +void CNN1dLayerBuilder::init(dynet::Model &model) { + p_filts.resize(in_nfmaps); p_fbias.resize(in_nfmaps); + for (int i = 0; i < in_nfmaps; ++i) { + p_filts[i].resize((unsigned long) out_nfmaps); + p_fbias[i].resize((unsigned long) out_nfmaps); + for (int j = 0; j < out_nfmaps; ++j) { + p_filts[i][j] = model.add_parameters({(unsigned)in_rows, (unsigned)filter_width}, 0.01); + p_fbias[i][j] = model.add_parameters({(unsigned)in_rows}, 0.05); + } + } +} + +vector +CNN1dLayerBuilder::forward(dynet::ComputationGraph &cg, const vector &inlayer) { + const unsigned out_nfmaps = (const unsigned int) p_filts.front().size(); + const unsigned in_nfmaps = (const unsigned int) p_filts.size(); + if (in_nfmaps != inlayer.size()) { + cerr << "Mismatched number of input features (" << inlayer.size() << "), expected " << in_nfmaps << endl; + abort(); + } + vector r(out_nfmaps); + + vector tmp(in_nfmaps); + for (unsigned fj = 0; fj < out_nfmaps; ++fj) { + for (unsigned fi = 0; fi < in_nfmaps; ++fi) { + Expression t = conv2d(inlayer[fi], parameter(cg, p_filts[fi][fj]), {1, 1}); + t = colwise_add(t, parameter(cg, p_fbias[fi][fj])); + tmp[fi] = t; + } + Expression s = sum(tmp); + if (k_fold_rows > 1) + s = fold_rows(s, (unsigned int) k_fold_rows); + s = kmax_pooling(s, (unsigned int) out_length); + r[fj] = rectify(s); + } + return r; +} diff --git a/src/srl/common/model/CNN1dLayerBuilder.h b/src/srl/common/model/CNN1dLayerBuilder.h new file mode 100644 index 000000000..f0cf59a35 --- /dev/null +++ b/src/srl/common/model/CNN1dLayerBuilder.h @@ -0,0 +1,52 @@ +// +// Created by liu on 2017/4/7. +// + +#ifndef PROJECT_CNNLAYERBUILDER_H +#define PROJECT_CNNLAYERBUILDER_H + +#include "ModelBuilder.h" +#include "dynet/nodes.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/timing.h" +#include "dynet/rnn.h" +#include "dynet/gru.h" +#include "dynet/lstm.h" +#include "dynet/dict.h" +#include +#include +#include "vector" + +using namespace dynet; +using namespace dynet::expr; +using namespace std; + +/** + * CNN 卷积+pooling层构造器 + * 从Expression数组(一组图像)到Expression数组(另一组图像(卷积+pooling之后)) + * + * in_rows 输入序列每个元素的向量维度 + * k_fold 1 no folding, 2 fold two rows together, 3 ... 折叠在一起的目的是一起kmax_pooling + * filter_width 卷积核宽度 + * in_nfmaps 输入张数 + * out_nfmaps 输出张数 + * out_length 输出长度 (输出宽度=in_rows/k_fold) + */ +class CNN1dLayerBuilder: public ModelBuilder, vector> { +protected: + int in_rows, k_fold_rows, filter_width, in_nfmaps, out_nfmaps, out_length; + + vector> p_filts; // [feature map index from][feature map index to] + vector> p_fbias; // [feature map index from][feature map index to] +public: + CNN1dLayerBuilder(int in_rows, int k_fold_rows, int filter_width, int in_nfmaps, int out_nfmaps, int out_length); + + virtual void init(dynet::Model &model); + + virtual vector forward(dynet::ComputationGraph &hg, const vector &aClass); + +}; + + +#endif //PROJECT_CNNLAYERBUILDER_H diff --git a/src/srl/common/model/ConstLookupModelBuilder.h b/src/srl/common/model/ConstLookupModelBuilder.h new file mode 100644 index 000000000..e3e69838b --- /dev/null +++ b/src/srl/common/model/ConstLookupModelBuilder.h @@ -0,0 +1,23 @@ +// +// Created by liu on 2017/3/9. +// + +#ifndef PROJECT_CONSTLOOKUPMODELBUILDER_H +#define PROJECT_CONSTLOOKUPMODELBUILDER_H + +#include "LookupModelBuilder.h" +#include + +class ConstLookupModelBuilder: public LookupModelBuilder { +public: + ConstLookupModelBuilder(unsigned inputDim = 0, unsigned expressionDim = 0): + LookupModelBuilder(inputDim, expressionDim) { } + + virtual Expression forward(ComputationGraph &hg, const unsigned & num) { + return dynet::expr::const_lookup(hg, lookupParameter, num); + } + +}; + + +#endif //PROJECT_CONSTLOOKUPMODELBUILDER_H diff --git a/src/srl/common/model/LabelModel.h b/src/srl/common/model/LabelModel.h new file mode 100644 index 000000000..a0ecea851 --- /dev/null +++ b/src/srl/common/model/LabelModel.h @@ -0,0 +1,40 @@ +// +// Created by liu on 2017/5/5. +// + +#ifndef PROJECT_LABELMODEL_H +#define PROJECT_LABELMODEL_H + +#include "BaseLabelModel.h" +#include "Const.h" + +using namespace dynet; +using namespace dynet::expr; + +template +class LabelModel : public BaseLabelModel { + ModelConf& config; + base::Debug debug; +public: + float dropout_rate = 0; // 默认关闭dropout + + LabelModel(ModelConf &config) : + BaseLabelModel (config), + config(config), debug("LabelModel") { } + + virtual Expression label(ComputationGraph& hg, SampleClass & samples) = 0; + + virtual Expression ExtractError(ComputationGraph& hg, Expression& adists, SampleClass & samples, Performance &perf) = 0; + /** + * 提取Prediction + * @param hg + * @param adists + * @param answerTable + * @return + */ + virtual Prediction ExtractResults(ComputationGraph& hg, Expression& adists) { + return extractPrediction(as_vector(hg.incremental_forward(adists))); + } +}; + +#endif //PROJECT_LABELMODEL_H diff --git a/src/srl/common/model/LookupModelBuilder.h b/src/srl/common/model/LookupModelBuilder.h new file mode 100644 index 000000000..3b020dd58 --- /dev/null +++ b/src/srl/common/model/LookupModelBuilder.h @@ -0,0 +1,75 @@ +// +// Created by liu on 2017/2/20. +// + +#ifndef TTPLAB_LOOKUPMODELBUILDER_H +#define TTPLAB_LOOKUPMODELBUILDER_H + +#include "./ModelBuilder.h" +#include "dynet/nodes.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/timing.h" +#include "dynet/rnn.h" +#include "dynet/gru.h" +#include "dynet/lstm.h" +#include "dynet/dict.h" +#include +#include +#include "vector" + +using namespace dynet; +using namespace dynet::expr; +using namespace std; + +class LookupModelBuilder: public ModelBuilder { +protected: + unsigned inputDim, expressionDim; + LookupParameter lookupParameter; +public: + LookupModelBuilder(unsigned inputDim = 0, unsigned expressionDim = 0): + inputDim(inputDim), + expressionDim(expressionDim) { } + + unsigned getExpressionDim() const { + return expressionDim; + } + + unsigned getInputDim() const { + return inputDim; + } + + void setInputDim(unsigned int inputDim) { + LookupModelBuilder::inputDim = inputDim; + } + + void setExpressionDim(unsigned int expressionDim) { + LookupModelBuilder::expressionDim = expressionDim; + } + + virtual void init(Model & model) { + assert(inputDim > 0); + assert(expressionDim > 0); + lookupParameter = model.add_lookup_parameters(inputDim, {expressionDim}); + } + + virtual Expression forward(ComputationGraph &hg, const unsigned & num) { + return lookup(hg, lookupParameter, num); + } + + virtual vector forwardList(ComputationGraph &hg, const vector & nums) { + vector res; + for (int i = 0; i < nums.size(); i++) { + res.push_back(forward(hg, nums[i])); + } + return res; + } + + void initialize(unsigned index, vector& val) { + lookupParameter.initialize(index, val); + } + +}; + + +#endif //TTPLAB_LOOKUPMODELBUILDER_H diff --git a/src/srl/common/model/MLPModelBuilder.h b/src/srl/common/model/MLPModelBuilder.h new file mode 100644 index 000000000..74b1220e3 --- /dev/null +++ b/src/srl/common/model/MLPModelBuilder.h @@ -0,0 +1,57 @@ +// +// Created by liu on 2017/4/10. +// + +#ifndef PROJECT_MLPMODELBUILDER_H +#define PROJECT_MLPMODELBUILDER_H + +#include "./ModelBuilder.h" +#include "dynet/nodes.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/timing.h" +#include "dynet/rnn.h" +#include "dynet/gru.h" +#include "dynet/lstm.h" +#include "dynet/dict.h" +#include +#include +#include "vector" + +using namespace dynet; +using namespace dynet::expr; +using namespace std; + +class MLPModelBuilder : public ModelBuilder { + vector layerDims; + unsigned outDim; + vector bias; + vector mulParams; +public: + MLPModelBuilder(vector layerDims, unsigned outDim) : layerDims(layerDims), outDim(outDim) + { + } + + virtual void init(dynet::Model &model) { + assert(layerDims.size() > 0); + assert(outDim > 0); + for (int i = 0; i < layerDims.size() - 1; ++i) { + mulParams.push_back(model.add_parameters({layerDims[i + 1], layerDims[i]})); + bias.push_back(model.add_parameters({layerDims[i + 1]})); + } + mulParams.push_back(model.add_parameters({outDim, layerDims[layerDims.size() - 1]})); + bias.push_back(model.add_parameters({outDim})); + } + + virtual Expression forward(dynet::ComputationGraph &hg, const Expression& features) { + Expression hiddenLayer = features; + for (int i = 0; i < mulParams.size(); ++i) { + hiddenLayer = (parameter(hg, mulParams[i]) * hiddenLayer + parameter(hg, bias[i])); + hiddenLayer = dynet::expr::logistic(hiddenLayer); + } + return hiddenLayer; + } + +}; + +#endif //PROJECT_MLPMODELBUILDER_H diff --git a/src/srl/common/model/ModelBuilder.h b/src/srl/common/model/ModelBuilder.h new file mode 100644 index 000000000..4c3284dcb --- /dev/null +++ b/src/srl/common/model/ModelBuilder.h @@ -0,0 +1,46 @@ +// +// Created by liu on 2017/2/20. +// + +#ifndef TTPLAB_MODELBUILDER_H +#define TTPLAB_MODELBUILDER_H + +#include + +/** + * Builder + * 此类和派生类的作用是辅助model类搭建复杂模型,实际上是一个参数分组类的接口类。 + * + * 一个builder应该包含 + * - 从某种表达式(或数字)到目标表达式的推导方法。 + * - 并且包含运算过程相关参数。 + * + * 一个builder可以被其他builder组合包含 + * + * 每个builder必须实现此类接口 + */ +template +class ModelBuilder { +public: + dynet::Model * model; + ModelBuilder() { + + } + + virtual void init(dynet::Model & model) { + this->model = & model; + } + + + virtual OutputClass forward(dynet::ComputationGraph &hg, const InputClass &) { + return OutputClass(); + }; + + virtual OutputClass forward(dynet::ComputationGraph &hg, const InputClass && in) { + return forward(hg, in); + } + +}; + + +#endif //TTPLAB_MODELBUILDER_H diff --git a/src/srl/common/model/PiSrlModel.h b/src/srl/common/model/PiSrlModel.h new file mode 100644 index 000000000..8ae2bb595 --- /dev/null +++ b/src/srl/common/model/PiSrlModel.h @@ -0,0 +1,54 @@ +// +// Created by liu on 2017/5/12. +// + +#ifndef BILSTM_SRL_PISRLMODEL_H +#define BILSTM_SRL_PISRLMODEL_H + +#include +#include +#include "../structure/SrlPiSample.h" +#include "Const.h" + +// model builders +#include +#include +#include + +class PiSrlModel : public SeqLabelModel { + + + +public: + enum Look { WORD = 0, POS, REL, POSITION, ARG, ALL }; + + PiSrlModel(ModelConf &config) : SeqLabelModel(config) + { } + + void registerDict(vector& samples) { + dict.resize(ALL); + dict[WORD].convert(ROOT_MARK); + dict[POS].convert(ROOT_MARK); + dict[REL].convert(ROOT_MARK); + for (int j = 0; j < samples.size(); ++j) { + for (int k = 0; k < samples[j].size(); ++k) { + dict[WORD].convert(samples[j].getWord(k).getWord()); + dict[POS].convert(samples[j].getWord(k).getPos()); + dict[REL].convert(samples[j].getWord(k).getRel()); + dict[POSITION].convert(samples[j].getWord(k).getPosition()); + vector& args = samples[j].getWord(k).getArgs(); + for (auto i = args.begin(); i != args.end(); i++) { + dict[ARG].convert(*i); + } + } + } + + freezeDict(); + dict[WORD].set_unk(UNK_WORD); + dict[POS].set_unk(UNK_WORD); + dict[REL].set_unk(UNK_WORD); + } +}; + + +#endif //BILSTM_SRL_PISRLMODEL_H diff --git a/src/srl/common/model/RNNModelBuilder.h b/src/srl/common/model/RNNModelBuilder.h new file mode 100644 index 000000000..b530d2633 --- /dev/null +++ b/src/srl/common/model/RNNModelBuilder.h @@ -0,0 +1,215 @@ +// +// Created by liu on 2017/5/22. +// + +#ifndef BILSTM_SRL_RNNMODELBUILDER_H +#define BILSTM_SRL_RNNMODELBUILDER_H + +#include "./ModelBuilder.h" +#include "dynet/nodes.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/timing.h" +#include "dynet/rnn.h" +#include "dynet/gru.h" +#include "dynet/lstm.h" +#include "dynet/dict.h" +#include +#include +#include "vector" + +using namespace dynet; +using namespace dynet::expr; +using namespace std; + +template class RNNModelBuilder; +typedef RNNModelBuilder LSTMModelBuilder; +typedef RNNModelBuilder GRUModelBuilder; +typedef RNNModelBuilder SimpleRNNModelBuilder; + +template +class RNNModelBuilder : public ModelBuilder, vector>{ + unsigned layers; + unsigned inputDim; + unsigned outputDim; + DynetRnnBuilder dynetRnnBuilder; + Parameter begin; + Parameter end; +public: + RNNModelBuilder(unsigned layers, unsigned inputDim, unsigned outputDim) : + layers(layers), + inputDim(inputDim), + outputDim(outputDim) + {} + + void setLayers(unsigned int _layers) { layers = _layers; } + + void setInputDim(unsigned int _inputDim) { inputDim = _inputDim; } + + void setOutputDim(unsigned int _outputDim) { outputDim = _outputDim; } + + using ModelBuilder::init; + virtual void init(Model &model, bool initBeginEnd = true) { + dynetRnnBuilder = DynetRnnBuilder(layers, inputDim, outputDim, model); + if (initBeginEnd) { + begin = model.add_parameters({inputDim}); + end = model.add_parameters({inputDim}); + } + } + + virtual void init(Model &model, Parameter& begin, Parameter& end) { + this->begin = begin; + this->end = end; + init(model, false); + } + + void newGraph(ComputationGraph &cg) { dynetRnnBuilder.new_graph(cg); } + void startNewSequence(vector h_0 = {}) { dynetRnnBuilder.start_new_sequence(); } + + void dropOut(float d) { if (d > 1e-6) { dynetRnnBuilder.set_dropout(d); } else { dynetRnnBuilder.disable_dropout(); } } + + void disableDropOut() { dynetRnnBuilder.disable_dropout(); } + + /** + * + * @param hg + * @param inputList + * @return + * + * begin -> [1 -> 2 -> ... -> last] -> end + * | | |...| | + * V V V V V + * [1 -> 2 -> ... -> last] return this out put + */ + virtual vector forward(dynet::ComputationGraph &hg, const vector& inputList) { + vector res; + dynetRnnBuilder.add_input(parameter(hg, begin)); + for (int i = 0; i < inputList.size(); ++i) { + res.push_back(dynetRnnBuilder.add_input(inputList[i])); + } + dynetRnnBuilder.add_input(parameter(hg, end)); + return res; + } + + /** + * this is used for backward lstm in BiLSTM + * note : use forward or use backward only! Use both is logically wrong! + * @param hg + * @param inputList + * @return + * + * begin <- [1 <- 2 <- ... <- last] <- end + * | | |...| | + * V V V V V + * [1 -> 2 -> ... -> last] return this out put + */ + + virtual vector backward(dynet::ComputationGraph &hg, const vector& inputList) { + vector res(inputList.size()); + dynetRnnBuilder.add_input(parameter(hg, end)); + for (int i = inputList.size() - 1; i >= 0; --i) { + res[i] = dynetRnnBuilder.add_input(inputList[i]); + } + dynetRnnBuilder.add_input(parameter(hg, begin)); + return res; + } + + /** + * + * @param hg + * @param inputList + * @return + * + * begin -> [1 -> 2 -> ... -> last] -> end + * | + * V + * EXP return this out put + */ + virtual Expression forwardBack(dynet::ComputationGraph &hg, vector& inputList) { + dynetRnnBuilder.add_input(parameter(hg, begin)); + for (int i = 0; i < inputList.size(); ++i) { + dynetRnnBuilder.add_input(inputList[i]); + } + dynetRnnBuilder.add_input(parameter(hg, end)); + return dynetRnnBuilder.back(); + } + + /** + * + * @param hg + * @param inputList + * @return + * + * begin <- [1 <- 2 <- ... <- last] <- end + * | + * V + * EXP return this out put + */ + virtual Expression backwardBack(dynet::ComputationGraph &hg, vector& inputList) { + dynetRnnBuilder.add_input(parameter(hg, end)); + for (int i = inputList.size() - 1; i >= 0; --i) { + dynetRnnBuilder.add_input(inputList[i]); + } + dynetRnnBuilder.add_input(parameter(hg, begin)); + return dynetRnnBuilder.back(); + } + + /** + * + * @param hg + * @param inputList + * @param order + * @return + * + * begin -> [1 -> 2 -> ... -> last] -> end + * | + * V + * EXP return this out put + */ + virtual Expression forwardByOrder(dynet::ComputationGraph &hg, vector& inputList, vector& order) { + dynetRnnBuilder.add_input(parameter(hg, begin)); + for (int i = 0; i < order.size(); ++i) { + dynetRnnBuilder.add_input(inputList[order[i]]); + } + return dynetRnnBuilder.back(); + } + + virtual Expression forwardByOrder(dynet::ComputationGraph &hg, vector& inputList, vector& order, Expression& escape) { + dynetRnnBuilder.add_input(parameter(hg, begin)); + for (int i = 0; i < order.size(); ++i) { + dynetRnnBuilder.add_input(order[i] > 0 ? inputList[order[i]] : escape); + } + return dynetRnnBuilder.back(); + } + /** + * + * @param hg + * @param inputList + * @param order + * @return + * + * begin -> [1 -> 2 -> ... -> last] -> end + * | + * V + * EXP return this out put + */ + virtual Expression forwardBackByOrder(dynet::ComputationGraph &hg, vector& inputList, vector& order) { + dynetRnnBuilder.add_input(parameter(hg, begin)); + for (int i = 0; i < order.size(); ++i) { + dynetRnnBuilder.add_input(inputList[order[i]]); + } + dynetRnnBuilder.add_input(parameter(hg, end)); + return dynetRnnBuilder.back(); + } + virtual Expression forwardBackByOrder(dynet::ComputationGraph &hg, vector& inputList, vector& order, Expression& escape) { + dynetRnnBuilder.add_input(parameter(hg, begin)); + for (int i = 0; i < order.size(); ++i) { + dynetRnnBuilder.add_input(order[i] > 0 ? inputList[order[i]] : escape); + } + dynetRnnBuilder.add_input(parameter(hg, end)); + return dynetRnnBuilder.back(); + } + +}; + +#endif //BILSTM_SRL_RNNMODELBUILDER_H diff --git a/src/srl/common/model/SeqLabelModel.h b/src/srl/common/model/SeqLabelModel.h new file mode 100644 index 000000000..41f160ebf --- /dev/null +++ b/src/srl/common/model/SeqLabelModel.h @@ -0,0 +1,44 @@ +// +// Created by liu on 2017/2/22. +// + +#ifndef PROJECT_BILSTMBASEMODEL_H +#define PROJECT_BILSTMBASEMODEL_H + +#include +#include +#include +#include +#include "structure/Performance.h" +#include "structure/Prediction.h" +#include "config/ModelConf.h" +#include "Const.h" +#include "./BaseLabelModel.h" + +#include +#include + +using namespace dynet; +using namespace dynet::expr; + +template +class SeqLabelModel : public BaseLabelModel { + ModelConf& config; + base::Debug debug; +public: + float dropout_rate = 0; // 默认关闭dropout + + SeqLabelModel(ModelConf &config) : + BaseLabelModel(config), + config(config), debug("SeqLabelModel") { } + + virtual vector label(ComputationGraph& hg, SampleClass & samples) = 0; + + virtual Expression ExtractError(ComputationGraph& hg, vector& adists, SampleClass & samples, Performance &perf) = 0; + + virtual void ExtractResults(ComputationGraph &hg, vector &adists, SampleClass &samples) = 0; + +}; + + +#endif //PROJECT_BILSTMBASEMODEL_H diff --git a/src/srl/common/process/DynetPredictor.h b/src/srl/common/process/DynetPredictor.h new file mode 100644 index 000000000..3eff4be19 --- /dev/null +++ b/src/srl/common/process/DynetPredictor.h @@ -0,0 +1,31 @@ +// +// Created by liu on 2017/5/5. +// + +#ifndef PROJECT_DYNETPREDICTOR_H +#define PROJECT_DYNETPREDICTOR_H + +#include +#include "process/ModelPredictor.h" +#include "config/ModelConf.h" + +template +class DynetPredictor : public model::ModelPredictor { + DynetConf& config; + +public: + DynetPredictor(PredConfigClass & config) : + model::ModelPredictor(config), + config(config) + { } + + void initDynet() { + dynet::DynetParams dynetParams; + dynetParams.mem_descriptor = config.dynet_mem; + dynetParams.random_seed = config.dynet_seed; + dynetParams.requested_gpus = config.dynet_gpus; + dynet::initialize(dynetParams); + } +}; + +#endif //PROJECT_DYNETPREDICTOR_H diff --git a/src/srl/common/process/DynetTrainer.h b/src/srl/common/process/DynetTrainer.h new file mode 100644 index 000000000..522828d67 --- /dev/null +++ b/src/srl/common/process/DynetTrainer.h @@ -0,0 +1,39 @@ +// +// Created by liu on 2017/5/5. +// + +#ifndef PROJECT_DYNETTRAINER_H +#define PROJECT_DYNETTRAINER_H + +#include "process/ModelTrainer.h" +#include "config/ModelConf.h" +template +class DynetTrainer : public model::ModelTrainer { + DynetConf& config; + +public: + DynetTrainer(TrainConfigClass & config) : + model::ModelTrainer(config), + config(config) + { } + + void initDynet() { + dynet::DynetParams dynetParams; + dynetParams.mem_descriptor = config.dynet_mem; + dynetParams.random_seed = config.dynet_seed; + dynetParams.requested_gpus = config.dynet_gpus; + dynet::initialize(dynetParams); + } + // 工具函数 +protected: + + string statusOfSgd(SimpleSGDTrainer & sgd) { + char s[64]; + sprintf(s, "[epoch=%.2f eta=%.2e clips=%.1f updates=%.0f]", sgd.epoch, sgd.eta, sgd.clips, sgd.updates); + sgd.updates = sgd.clips = 0; + return string(s); + } +}; + + +#endif //PROJECT_DYNETTRAINER_H diff --git a/src/srl/common/process/LabelModelSGDSeqPredictor.h b/src/srl/common/process/LabelModelSGDSeqPredictor.h new file mode 100644 index 000000000..5bf8c2656 --- /dev/null +++ b/src/srl/common/process/LabelModelSGDSeqPredictor.h @@ -0,0 +1,54 @@ +// +// Created by liu on 2017/2/24. +// + +#ifndef PROJECT_LABELMODELSGDPREDICTOR_H +#define PROJECT_LABELMODELSGDPREDICTOR_H + +#include +#include "process/DynetPredictor.h" +#include "model/SeqLabelModel.h" +#include "config/ModelConf.h" +#include "base/timer.h" + +template +class LabelModelSGDSeqPredictor : public DynetPredictor{ +public: + LabelModelPredictorConf &config; + vector testSamples; + SeqLabelModel & labelModel; + base::Debug debug; + + /** + * @param config + * @param labelModel 这是向父类传递的模型句柄,至少要实现BaseLabelModel定义的接口 + */ + LabelModelSGDSeqPredictor(PredConfigClass &config, SeqLabelModel & labelModel) : + DynetPredictor(config), config(config), + labelModel(labelModel), + debug(getClassName()) + { + DynetPredictor::initDynet(); + } + + virtual void predict() { + debug.debug("prediction start."); + base::ProgressBar bar((int)testSamples.size(), 25); + base::Timer timer; + for (int j = 0; j < testSamples.size(); ++j) { + ComputationGraph hg; + vector adists = labelModel.label(hg, testSamples[j]); + labelModel.ExtractResults(hg, adists, testSamples[j]); + if (bar.updateLength(j + 1)) + debug.info("(%d)%s is predicted", j + 1, bar.getProgress(j + 1).c_str()); + } + debug.debug(" predict %d in %s", (int)testSamples.size(), timer.end().c_str()); + } + + static string getClassName() { + return "LabelModelSGDPredictor"; + } +}; + + +#endif //PROJECT_LABELMODELSGDPREDICTOR_H diff --git a/src/srl/common/process/LabelModelSGDSeqTrainer.h b/src/srl/common/process/LabelModelSGDSeqTrainer.h new file mode 100644 index 000000000..90e7bad5b --- /dev/null +++ b/src/srl/common/process/LabelModelSGDSeqTrainer.h @@ -0,0 +1,145 @@ +// +// Created by liu on 2017/2/22. +// + +#ifndef PROJECT_SGDSeqTRAINER_H +#define PROJECT_SGDSeqTRAINER_H + +#include +#include +#include "model/SeqLabelModel.h" +#include "base/debug.h" +#include "config/ModelConf.h" +#include "./DynetTrainer.h" +#include "base/timer.h" +#include "model/LoopCounter.h" +#include "model/RandomOrderMap.h" +#include "process/ConditionStopper.h" +#include "process/TrainStats.h" + +using namespace dynet; + +template +class LabelModelSGDSeqTrainer : public DynetTrainer { + LabelModelTrainerConf &config; + base::Debug debug; +public: + vector trainSamples, devSamples; + SimpleSGDTrainer * sgd; + Performance bestDevPerf; + SeqLabelModel & labelModel; + bool useDropOut = false; + + LabelModelSGDSeqTrainer(TrainConfigClass &config, SeqLabelModel & labelModel) : + DynetTrainer(config), config(config), debug(getClassName()), + labelModel(labelModel) { + DynetTrainer::initDynet(); + resetDropOut(); + } + + virtual void train () { + debug.debug("Training start"); + + + sgd = new SimpleSGDTrainer(labelModel.model, config.et0, config.eta_decay); + + unsigned trainSetSize = (unsigned)trainSamples.size(); + unsigned batchSize = min(config.batch_size, trainSetSize); + + checkDev(); + model::LoopCounter trainSetInnerIter(trainSetSize); + process::ConditionStopper conditionStopper; + TrainStats trainStats; + model::RandomOrderMap order((unsigned)trainSamples.size()); + int turn_iter; int lastSaveTurn = 0; + for (turn_iter = 1; turn_iter <= config.max_iter; turn_iter++) { + // 迭代一个batch + trainStats.newBatch(); + Performance perf; + for (unsigned batchInnerIter = 0; batchInnerIter < batchSize; batchInnerIter++) { + SimpleClass &samples = trainSamples[order++]; + double err = trainOneSampleGroup(samples, perf); + if (err < 0.0) labelModel.load(); + trainStats.updateSample(err, samples.size()); + } + debug.info("%s %s %s", + DynetTrainer::statusOfSgd(*sgd).c_str(), + trainStats.getBatchStats().c_str(), + perf.toString().c_str()); + + sgd->update_epoch((float)batchSize/trainSetSize); + + // check dev set to save + if (turn_iter % config.batches_to_save == 0 && checkDev()) { + labelModel.save(); lastSaveTurn = 0; + } else { + lastSaveTurn ++; + } + + // auto stop training + if (config.use_auto_stop && conditionStopper.auto_end(perf, bestDevPerf, turn_iter, sgd->epoch, lastSaveTurn)) { + debug.debug("auto finish training."); + break; + } + } + trainStats.printTrainEndStats(); + } + + virtual bool checkDev() { + Performance dev_perf; + unsigned dev_size = (unsigned) devSamples.size(); + disableDropOut(); + base::Timer t; + for (int j = 0; j < dev_size; ++j) { + ComputationGraph hg; + vector results = labelModel.label(hg, devSamples[j]); + labelModel.ExtractError(hg, results, devSamples[j], dev_perf); + } + debug.debug(" **dev %s (best f=%lf) [%u samples in %s]", dev_perf.toString().c_str(), bestDevPerf.fscore(), dev_size, t.end().c_str()); + resetDropOut(); + if (dev_perf.fscore() > (bestDevPerf.fscore() + config.best_perf_sensitive)) { + bestDevPerf = dev_perf; + return true; + } else if (dev_perf.fscore() > bestDevPerf.fscore()){ + debug.debug(" this test dev f:%lf is no larger than best %lf %f. The small upgrade will be ignored.", + dev_perf.fscore(), bestDevPerf.fscore(), config.best_perf_sensitive); + } + return false; + } + + void resetDropOut() { + labelModel.setDropOut(config.use_dropout ? config.dropout_rate : 0); + } + + void disableDropOut() { + labelModel.setDropOut(0); + } + +protected: + + virtual double trainOneSampleGroup(SimpleClass & sampleGroup, Performance & perf) { + ComputationGraph hg; + vector results = labelModel.label(hg, sampleGroup); + Expression err = labelModel.ExtractError(hg, results, sampleGroup, perf); + double lp = as_scalar(hg.incremental_forward(err)); + if (lp >= 0.0) { + // could feedback err + hg.backward(err); + sgd->update(1.0); + return lp; + } else { + // err=nan leaning_rate -= decay + sgd->update_epoch(0.1); + debug.warning(" got NAN err, sgd reset : %s", DynetTrainer::statusOfSgd(*sgd).c_str()); + return -1; // for reload model + } + } + + static string getClassName() { + return "LabelModelSGDSeqTrainer"; + } + +}; + + +#endif //PROJECT_SGDTRAINER_H diff --git a/src/srl/common/process/LabelModelSGDTrainer.h b/src/srl/common/process/LabelModelSGDTrainer.h new file mode 100644 index 000000000..119d3c112 --- /dev/null +++ b/src/srl/common/process/LabelModelSGDTrainer.h @@ -0,0 +1,137 @@ +// +// Created by liu on 2017/2/22. +// + +#ifndef PROJECT_SGDTRAINER_H +#define PROJECT_SGDTRAINER_H + +#include "process/DynetTrainer.h" +#include "model/BaseLabelModel.h" +#include "base/debug.h" +#include "config/ModelConf.h" +#include "base/timer.h" +#include "dynet/training.h" +#include "model/LoopCounter.h" +#include "model/RandomOrderMap.h" + + +template +class LabelModelSGDTrainer : public DynetTrainer { +public: + LabelModelTrainerConf &config; + vector trainSamples, devSamples; + dynet::SimpleSGDTrainer * sgd; + BaseLabelModel & labelModel; + Performance bestDevPerf; + base::Debug debug; + + bool useDropOut = false; + bool enableFirstDevCheck = true; + + LabelModelSGDTrainer(TrainConfigClass &config, BaseLabelModel & labelModel) : + DynetTrainer(config), config(config), + labelModel(labelModel), + debug(getClassName()){ + DynetTrainer::initDynet(); + resetDropOut(); + } + + virtual void train () { + sgd = new SimpleSGDTrainer(labelModel.model, config.et0, config.eta_decay); + debug.debug("Training start"); + base::Timer t; + + unsigned trainSetSize = (unsigned)trainSamples.size(); + unsigned batchSize = min(config.batch_size, trainSetSize); + + model::LoopCounter trainSetInnerIter(trainSetSize); + double totalSeenSampleNum = 0; + if (enableFirstDevCheck) checkDev(); + model::RandomOrderMap order((unsigned)trainSamples.size()); + + for (int turn_iter = 1; turn_iter <= config.max_iter; turn_iter++) { + // 迭代一个batch + Performance perf; // 每个batch 累计统计 + double llh = 0; // 每个batch 的累计lost + double batchSampleNum = 0; // batch 中词总数 + for (unsigned batchInnerIter = 0; batchInnerIter < batchSize; batchInnerIter++) { + SimpleClass &samples = trainSamples[order++]; + batchSampleNum ++; + double err = trainOneSampleGroup(samples, perf); + if (err < 0.0) labelModel.load(); + llh += err; + totalSeenSampleNum += 1; + } + debug.info("%s update #%d \terr:%.2lf e/b:%lf %s", + statusOfSgd(*sgd).c_str(), turn_iter, + totalSeenSampleNum/trainSetSize, llh, (llh/batchSampleNum), perf.toString().c_str()); + sgd->update_epoch((float)batchSize/trainSetSize); + + if (turn_iter % config.batches_to_save == 0) { + if (checkDev()) { + labelModel.save(); + } + } + } + debug.debug("Training end. Total using %s, iter %u batch, %.0ld samples", + t.end().c_str(), config.max_iter, totalSeenSampleNum); + } + + virtual bool checkDev() { + Performance dev_perf; + unsigned dev_size = (unsigned) devSamples.size(); + disableDropOut(); + base::Timer t; + for (int j = 0; j < dev_size; ++j) { + ComputationGraph hg; + Expression results = labelModel.label(hg, devSamples[j]); + labelModel.ExtractError(hg, results, devSamples[j], dev_perf); + } + debug.debug(" **dev %s (best f=%lf) [%u samples in %s]", dev_perf.toString().c_str(), bestDevPerf.fscore(), dev_size, t.end().c_str()); + resetDropOut(); + if (dev_perf.fscore() > (bestDevPerf.fscore() + config.best_perf_sensitive)) { + bestDevPerf = dev_perf; + return true; + } else if (dev_perf.fscore() > bestDevPerf.fscore()){ + debug.debug(" this test dev f:%lf is no larger than best %lf %f. The small upgrade will be ignored.", + dev_perf.fscore(), bestDevPerf.fscore(), config.best_perf_sensitive); + } + return false; + } + + void resetDropOut() { + labelModel.setDropOut(config.use_dropout ? config.dropout_rate : 0); + } + + void disableDropOut() { + labelModel.setDropOut(0); + } + +protected: + + virtual double trainOneSampleGroup(SimpleClass & sampleGroup, Performance & perf) { + ComputationGraph hg; + Expression results = labelModel.label(hg, sampleGroup); + Expression err = labelModel.ExtractError(hg, results, sampleGroup, perf); + double lp = as_scalar(hg.incremental_forward(err)); + if (lp >= 0.0) { + // could feedback err + hg.backward(err); + sgd->update(1.0); + return lp; + } else { + // err=nan leaning_rate -= decay + sgd->update_epoch(0.1); + debug.warning(" got NAN err, sgd reset : %s", statusOfSgd(*sgd).c_str()); + return -1; // for reload model + } + } + + static string getClassName() { + return "LabelModelSGDTrainer"; + } + +}; + + +#endif //PROJECT_SGDTRAINER_H diff --git a/src/srl/common/process/TrainStats.h b/src/srl/common/process/TrainStats.h new file mode 100644 index 000000000..e3bb15c5c --- /dev/null +++ b/src/srl/common/process/TrainStats.h @@ -0,0 +1,58 @@ +// +// Created by liu on 2017/5/11. +// + +#ifndef PROJECT_TRAINSTATS_H +#define PROJECT_TRAINSTATS_H + +#include "base/debug.h" +#include "base/timer.h" + +class TrainStats { + base::Debug debug; + // global + double total_seen_sample_num = 0, trained_batches = 0; + base::Timer t; + // in batch + double batch_err = 0; int batch_simple_size = 0; +public: + TrainStats() : debug(TrainStats::getClassName()) + { + t.start(); + } + + void updateSample(double err, int sampleSize = 1) { + batch_err += err; + batch_simple_size = sampleSize; + total_seen_sample_num++; + } + + void newBatch() { + // init batch vals + batch_err = 0; + batch_simple_size = 0; + trained_batches++; + } + + string getBatchStats() { + char s[128]; + sprintf(s, "#%.0lf err:%.2lf e/b:%.2lf", + trained_batches, + batch_err, + batch_err / batch_simple_size); + return string(s); + } + + void printTrainEndStats() { + debug.debug("Training end. Total using %s, iter %.0lf batch, %.0lf samples", + t.end().c_str(), trained_batches, total_seen_sample_num); + } + + static string getClassName() { + return "TrainStats"; + } + +}; + + +#endif //PROJECT_TRAINSTATS_H diff --git a/src/srl/common/structure/SrlPiSample.cpp b/src/srl/common/structure/SrlPiSample.cpp new file mode 100644 index 000000000..28753af49 --- /dev/null +++ b/src/srl/common/structure/SrlPiSample.cpp @@ -0,0 +1,9 @@ +// +// Created by liu on 2017/5/18. +// + +#include "SrlPiSample.h" +#include "Const.h" + +Word SrlPiSample::root = Word(0, ROOT_MARK, ROOT_MARK, -1, ROOT_MARK, "before", NIL_LABEL); + diff --git a/src/srl/common/structure/SrlPiSample.h b/src/srl/common/structure/SrlPiSample.h new file mode 100644 index 000000000..a607898df --- /dev/null +++ b/src/srl/common/structure/SrlPiSample.h @@ -0,0 +1,46 @@ +// +// Created by liu on 2017-05-12. +// + +#ifndef PROJECT_SrlPiSAMPLE_H +#define PROJECT_SrlPiSAMPLE_H + +#include "vector" +#include "Word.h" +#include "structure/DataConcept.h" +using namespace std; + +class SrlPiSample : public extractor::DataConcept { + vector data; + static Word root; +public: + unsigned size() { + return data.size(); + } + + vector getPredicateList() { + vector ans; + for (int j = 0; j < data.size(); ++j) { + if (data[j].isPredicate()) { + ans.push_back(data[j].getInnerIndex()); + } + } + return ans; + } + + Word & getWord(int index) { + if (index == -1) return root; + return data[index]; + } + + void push_back(const Word & w) { + data.push_back(w); + } + + static string getClassName() { + return "PiSample"; + } +}; + + +#endif //PROJECT_PiSAMPLE_H diff --git a/src/srl/common/structure/Word.h b/src/srl/common/structure/Word.h new file mode 100644 index 000000000..9b4846232 --- /dev/null +++ b/src/srl/common/structure/Word.h @@ -0,0 +1,93 @@ +// +// Created by liu on 2017/5/12. +// + +#ifndef BILSTM_SRL_WORD_H +#define BILSTM_SRL_WORD_H + +#include "iostream" +#include "vector" +#include "Const.h" +#include "Const.h" +using namespace std; + +class Word { + int innerIndex; + string word; + string pos; + + int parent; + string rel; + string position; + + string predicate; + vector args; +public: + Word(int innerIndex, + const string &word, + const string &pos, + int parent, + const string &rel, + const string position, + const string &predicate, + const vector> &args + ) : innerIndex(innerIndex), word(word), pos(pos), parent(parent), rel(rel), + position(position), predicate(predicate), args(args) {} + + Word(int innerIndex, + const string &word, + const string &pos, + int parent, + const string &rel, + const string position, + const string &predicate + ) : innerIndex(innerIndex), word(word), pos(pos), parent(parent), rel(rel), + position(position), predicate(predicate) {} + + int getInnerIndex() const { + return innerIndex; + } + + const string &getWord() const { + return word; + } + + const string &getPos() const { + return pos; + } + + const string &getRel() const { + return rel; + } + + const string &getPosition() const { + return position; + } + + int getParent() const { + return parent; + } + + const string &getPredicate() const { + return predicate; + } + + void setPredicate(bool isPred) { + predicate = isPred ? PRED_LABEL : NIL_LABEL; + } + + bool isPredicate() const { + return predicate == PRED_LABEL; + } + + vector& getArgs() { + return args; + } + + void setArgs(const vector &args) { + Word::args = args; + } +}; + + +#endif //BILSTM_SRL_WORD_H diff --git a/src/srl/common/structure/WordEmbBuilder.h b/src/srl/common/structure/WordEmbBuilder.h new file mode 100644 index 000000000..0c2e0592b --- /dev/null +++ b/src/srl/common/structure/WordEmbBuilder.h @@ -0,0 +1,61 @@ +// +// Created by liu on 2017/1/4. +// + +#ifndef PROJECT_DATAWORDEMB_H +#define PROJECT_DATAWORDEMB_H + +#include +#include "string" +#include "iostream" +#include +#include +using namespace std; +using namespace extractor; + +class WordEmbBuilder { + unordered_map> * emb = NULL; + bool emb_holding_flag = false; + unsigned long emb_size = 0; + vector zero_emb; + + // 禁止拷贝 + WordEmbBuilder &operator=(const WordEmbBuilder &); +public: + WordEmbBuilder() {} + WordEmbBuilder(unordered_map> & emb) { setEmb(emb); } + WordEmbBuilder(const string& filename) { loadEmb(filename); } + ~WordEmbBuilder() { + if (emb_holding_flag) delete emb; + } + + void setEmb(unordered_map> & emb) { + assert(WordEmbBuilder::emb == NULL); + WordEmbBuilder::emb = &emb; + emb_size = (int) emb.begin()->second.size(); + zero_emb = vector(emb_size, 0); + } + void loadEmb(const string& filename) { + assert(emb == NULL); + ExtractorFileToWordEmb reader; + reader.init(filename); + emb = new unordered_map>(reader.run()); + emb_holding_flag = true; + emb_size = emb->begin()->second.size(); + zero_emb = vector(emb_size, 0); + } + + const vector& getEmb(const string &key) const { + assert(emb != NULL); + if (emb->find(key) != emb->end()) { + return (*emb)[key]; + } else { + return zero_emb; + } + } + + +}; + + +#endif //PROJECT_DATAWORDEMB_H diff --git a/src/srl/lgsrl.cpp b/src/srl/lgsrl.cpp deleted file mode 100644 index 00e7a0e81..000000000 --- a/src/srl/lgsrl.cpp +++ /dev/null @@ -1,746 +0,0 @@ -/** - * Training and testing suite for Semantic Role Labeling - * - * Feature: - * -> Train PRG model (predicate recognition) - * -> Train SRL model (semantic role labeling) - * -> Test PRG+SRL (pipeline) - * - * Author: jiangfeng - * Date : 2013.8.23 - * - */ - - -#include -#include -#include - -#include "Corpus.h" -#include "Configuration.h" -#include "Sentence.h" -#include "FeatureExtractor.h" -#include "GetInstance.h" -#include "maxent.h" -#include "options.h" -#include "cfgparser.hpp" -#include "logging.hpp" -#include "strutils.hpp" -#include "SRL_DLL.h" - -using namespace std; -using namespace ltp::utility; -using namespace ltp::strutils; -using namespace maxent; - -TrainOptions train_opt; -TestOptions test_opt; - -ME_Parameter me_prg_param; -ME_Parameter me_srl_param; - -bool __TRAIN_PRG__ = false; -bool __TRAIN_SRL__ = false; -bool __TEST__ = false; - -void usage(void) { - cerr << "srltrain - Training suite for semantic role labeling" << endl; - cerr << "Copyright (C) 2012-2014 HIT-SCIR" << endl; - cerr << endl; - cerr << "usage: ./srltrain " << endl; - cerr << endl; -} - -bool parse_cfg(ConfigParser & cfg) -{ - string strbuf; - int intbuf; - double dblbuf; - - if (cfg.has_section("train-srl")) { - TRACE_LOG("SRL training mode specified"); - - __TRAIN_SRL__ = true; - - if (cfg.get("train-srl", "srl-train-file", strbuf)) { - train_opt.srl_train_file = strbuf; - } else { - ERROR_LOG("srl-train-file config item is not found"); - return false; - } - - if (cfg.get("train-srl", "core-config", strbuf)) { - train_opt.core_config = strbuf; - } else { - ERROR_LOG("core-config config item is not found"); - return false; - } - - if (cfg.get("train-srl", "srl-config", strbuf)) { - train_opt.srl_config = strbuf; - } else { - ERROR_LOG("srl-config config item is not found"); - return false; - } - - if (cfg.get("train-srl", "srl-feature-dir", strbuf)) { - train_opt.srl_feature_dir = strbuf; - } else { - ERROR_LOG("[SRL] srl-feature-dir config item is not found"); - return false; - } - - if (cfg.get("train-srl", "srl-instance-file", strbuf)) { - train_opt.srl_instance_file = strbuf; - } else { - ERROR_LOG("[SRL] srl-instance-file config item is not found"); - return false; - } - - if (cfg.get("train-srl", "srl-model-file", strbuf)) { - train_opt.srl_model_file = strbuf; - } else { - ERROR_LOG("[SRL] srl-model-file config item is not found"); - return false; - } - - if (cfg.get("train-srl", "dst-config-dir", strbuf)) { - train_opt.dst_config_dir = strbuf; - } else { - ERROR_LOG("[SRL] dst_config_dir config item is not found"); - return false; - } - - if (cfg.get_integer("train-srl", "solver-type", intbuf)) { - switch (intbuf) { - case 0: me_srl_param.solver_type = L1_OWLQN; break; - case 1: me_srl_param.solver_type = L1_SGD; break; - case 2: me_srl_param.solver_type = L2_LBFGS; break; - default: - ERROR_LOG("Unsupported solver [%d]", intbuf); - break; - } - } - - if (cfg.get_float("train-srl", "l1-reg", dblbuf)) { - me_srl_param.l1_reg = dblbuf; - } - - if (cfg.get_float("train-srl", "l2-reg", dblbuf)) { - me_srl_param.l2_reg = dblbuf; - } - - if (cfg.get_integer("train-srl", "sgd-iter", intbuf)) { - me_srl_param.sgd_iter = intbuf; - } - - if (cfg.get_float("train-srl", "sgd-eta0", dblbuf)) { - me_srl_param.sgd_eta0 = dblbuf; - } - - if (cfg.get_float("train-srl", "sgd-alpha", dblbuf)) { - me_srl_param.sgd_alpha = dblbuf; - } - - if (cfg.get_integer("train-srl", "nheldout", intbuf)) { - me_srl_param.nheldout = intbuf; - } - } - - if (cfg.has_section("train-prg")) { - TRACE_LOG("PRG training model specified"); - - __TRAIN_PRG__ = true; - - if (cfg.get("train-prg", "prg-train-file", strbuf)) { - train_opt.prg_train_file = strbuf; - } else { - ERROR_LOG("prg-train-file config item is not found"); - return false; - } - - if (cfg.get("train-prg", "core-config", strbuf)) { - train_opt.core_config = strbuf; - } else { - ERROR_LOG("core-config config item is not found"); - return false; - } - - if (cfg.get("train-prg", "prg-instance-file", strbuf)) { - train_opt.prg_instance_file = strbuf; - } else { - ERROR_LOG("[PRG] prg-instance-file config item is not found"); - return false; - } - - if (cfg.get("train-prg", "prg-model-file", strbuf)) { - train_opt.prg_model_file = strbuf; - } else { - ERROR_LOG("[PRG] prg-model-file config item is not found"); - return false; - } - - if (cfg.get("train-prg", "dst-config-dir", strbuf)) { - train_opt.dst_config_dir = strbuf; - } else { - ERROR_LOG("[PRG] dst_config_dir config item is not found"); - return false; - } - - if (cfg.get_integer("train-prg", "solver-type", intbuf)) { - switch (intbuf) { - case 0: me_prg_param.solver_type = L1_OWLQN; break; - case 1: me_prg_param.solver_type = L1_SGD; break; - case 2: me_prg_param.solver_type = L2_LBFGS; break; - default: - ERROR_LOG("Unsupported solver [%d]", intbuf); - break; - } - - } - - if (cfg.get_float("train-prg", "l1-reg", dblbuf)) { - me_prg_param.l1_reg = dblbuf; - } - - if (cfg.get_float("train-prg", "l2-reg", dblbuf)) { - me_prg_param.l2_reg = dblbuf; - } - - if (cfg.get_integer("train-prg", "sgd-iter", intbuf)) { - me_prg_param.sgd_iter = intbuf; - } - - if (cfg.get_float("train-prg", "sgd-eta0", dblbuf)) { - me_prg_param.sgd_eta0 = dblbuf; - } - - if (cfg.get_float("train-prg", "sgd-alpha", dblbuf)) { - me_prg_param.sgd_alpha = dblbuf; - } - - if (cfg.get_integer("train-prg", "nheldout", intbuf)) { - me_prg_param.nheldout = intbuf; - } - } - - if (cfg.has_section("test")) { - TRACE_LOG("PRG-SRL testing specified"); - - __TEST__ = true; - - if (cfg.get("test", "test-file", strbuf)) { - test_opt.test_file = strbuf; - } else { - ERROR_LOG("test-file config item is not found"); - return false; - } - - if (cfg.get("test", "config-dir", strbuf)) { - test_opt.config_dir = strbuf; - } else { - ERROR_LOG("config-dir config item is not found"); - return false; - } - - if (cfg.get("test", "output-file", strbuf)) { - test_opt.output_file = strbuf; - } else { - ERROR_LOG("output-file config item is not found"); - return false; - } - } - - return true; -} - -bool copy_cfg(const string & src_cfg, - const string & dst_cfg) -{ - ifstream fsrc(src_cfg.c_str()); - ofstream fdst(dst_cfg.c_str()); - - if (!fdst) - { - ERROR_LOG("Cannot open [%s]", dst_cfg.c_str()); - return false; - } - - string line; - while (getline(fsrc, line)) - fdst << line << endl; - - fsrc.close(); - fdst.close(); - - return true; -} - -bool collect_prg_instances() -{ - Configuration configuration(train_opt.core_config); - FeatureExtractor feature_extractor(configuration); - FeatureCollection feature_collection; - vector feature_numbers; - vector feature_prefixes; - - ofstream inst_stream(train_opt.prg_instance_file.c_str()); - if (!inst_stream) { - ERROR_LOG("[PRG] cannot open instance file:[%s] for writing", - train_opt.prg_instance_file.c_str()); - return false; - } - - const vector & feat_set = - configuration.get_pred_recog_config().get_feature_names(); - - for (size_t i = 0; i < feat_set.size(); ++i) { - const string& feature_name = feat_set[i]; - const int feature_number = - feature_collection.get_feature_number(feature_name); - const string& feature_prefix = - feature_collection.get_feature_prefix(feature_number); - - feature_numbers.push_back(feature_number); - feature_prefixes.push_back(feature_prefix); - } - - Corpus corpus(train_opt.prg_train_file); - vector lines; - Sentence sentence; - - size_t sentence_count = 0; - while (corpus.get_next_block(lines)) { - ++sentence_count; - - sentence.from_corpus_block(lines); - const size_t row_count = sentence.get_row_count(); - - feature_extractor.set_target_sentence(sentence); - feature_extractor.calc_node_features(); - - vector > vct_feature_values; - for (size_t i = 0; i < feature_numbers.size(); ++i) { - vector feature_values; - - const int feature_number = feature_numbers[i]; - const string& feature_prefix = feature_prefixes[i]; - bool feature_empty_flag = false; - try { - feature_extractor.get_feature_for_rows( - feature_number, feature_values); - } catch (...) { - feature_empty_flag = true; - } - - if (feature_empty_flag) { - feature_values.clear(); - for (size_t row = 1; row <= row_count; ++row) - feature_values.push_back(""); - } - vct_feature_values.push_back(feature_values); - } - - for (size_t row = 1; row <= row_count; ++row) { - inst_stream << ((sentence.get_FILLPRED(row) == "Y") ? 'Y' : 'N'); - for (size_t i = 0; i < feature_numbers.size(); ++i) { - inst_stream << " " << feature_prefixes[i] - << "@" << vct_feature_values[i][row]; - } - inst_stream << endl; - } - } - - inst_stream.close(); - return true; -} - -bool collect_srl_instances() -{ - Configuration configuration(train_opt.core_config); - FeatureExtractor feature_extractor(configuration); - FeatureCollection feature_collection; - vector feature_numbers; - vector feature_prefixes; - - ofstream output_streams[TOTAL_FEATURE]; - ofstream label_stream; - - const vector & feat_set = - configuration.get_argu_config().get_feature_names(); - feature_numbers.clear(); - feature_prefixes.clear(); - - for (size_t i = 0; i < feat_set.size(); ++i) { - const string& feature_name = feat_set[i]; - const int feature_number - = feature_collection.get_feature_number(feature_name); - const string& feature_prefix - = feature_collection.get_feature_prefix(feature_number); - - feature_numbers.push_back(feature_number); - feature_prefixes.push_back(feature_prefix); - - string filename = train_opt.srl_feature_dir + "/" + feature_name; - output_streams[feature_number].open(filename.c_str()); - - if (!output_streams[feature_number]) { - ERROR_LOG("cannot open feature output file: [%s]", feature_name.c_str()); - return false; - } - } - - string label_filename = train_opt.srl_feature_dir + "/labels"; - label_stream.open(label_filename.c_str()); - if (!label_stream) { - ERROR_LOG("can't open labels file"); - return false; - } - - Corpus corpus(train_opt.srl_train_file); - vector lines; - Sentence sentence; - - size_t sentence_count = 0; - while (corpus.get_next_block(lines)) - { - ++sentence_count; - - sentence.from_corpus_block(lines); - const size_t predicate_count = sentence.get_predicates().size(); - const size_t row_count = sentence.get_row_count(); - - feature_extractor.set_target_sentence(sentence); - vector feature_values; - - for (size_t predicate_index = 0; predicate_index < predicate_count; - ++predicate_index) { // loop for each predicate - feature_extractor.calc_features(predicate_index); - - for (size_t i = 0; i < feature_numbers.size(); ++i) { - const int feature_number = feature_numbers[i]; - const string& feature_prefix = feature_prefixes[i]; - bool feature_empty_flag = false; - try { - feature_extractor.get_feature_for_rows( - feature_number, feature_values); - } - catch(...) { - feature_empty_flag = true; - } - - if (feature_empty_flag) { - for (size_t row = 1; row <= row_count; ++row) - output_streams[feature_number]< vs = split(line); - ME_Sample mes(vs, true); - model.add_training_sample(mes); - } - - model.train(); - model.save(model_path); - - return true; -} - -// unused -bool prg_predict() -{ - string core_config = test_opt.config_dir + "./Chinese.xml"; - string model_file = test_opt.config_dir + "./prg.model"; - Configuration configuration(core_config); - ME_Model prg_model(model_file); - Corpus corpus(test_opt.test_file); - ofstream output(test_opt.output_file.c_str()); - - FeatureExtractor feature_extractor(configuration); - FeatureCollection feature_collection; - vector feature_numbers; - vector feature_prefixes; - - const vector& feat_set = - configuration.get_pred_recog_config().get_feature_names(); - for (size_t i = 0; i < feat_set.size(); ++i) { - const string& feature_name = feat_set[i]; - const int feature_number = - feature_collection.get_feature_number(feature_name); - const string& feature_prefix = - feature_collection.get_feature_prefix(feature_number); - - feature_numbers.push_back(feature_number); - feature_prefixes.push_back(feature_prefix); - } - - if (!output) { - ERROR_LOG("Cannot open [%s]", test_opt.output_file.c_str()); - return false; - } - - vector corpus_lines; - Sentence sentence; - - // for each sentence - size_t sentence_count = 0; - while (corpus.get_next_block(corpus_lines)) { - ++sentence_count; - - sentence.from_corpus_block(corpus_lines); - const size_t row_count = sentence.get_row_count(); - - feature_extractor.set_target_sentence(sentence); - feature_extractor.calc_node_features(); - - vector< vector > vct_feature_values; - for (size_t i = 0; i < feature_numbers.size(); ++i) { - vector feature_values; - - const int feature_number = feature_numbers[i]; - const string& feature_prefix = feature_prefixes[i]; - bool feature_empty_flag = false; - try { - feature_extractor.get_feature_for_rows( - feature_number, feature_values); - } catch (...) { - feature_empty_flag = true; - } - - if (feature_empty_flag) { - feature_values.clear(); - for (size_t row = 1; row <= row_count; ++row) { - feature_values.push_back(""); - } - } - vct_feature_values.push_back(feature_values); - } - - vector predicate_rows; - for (size_t row = 1; row <= row_count; ++row) { - vector< pair > outcome; - vector instance; - - for (size_t i = 0; i < feature_numbers.size(); ++i) { - string feature = - feature_prefixes[i] - + "@" - + vct_feature_values[i][row]; - instance.push_back(feature); - } - - ME_Sample mes(instance); - prg_model.predict(mes, outcome); - if (outcome[0].first == "Y") - predicate_rows.push_back(row); - } - sentence.set_predicates(predicate_rows); - - output << sentence.to_corpus_block() << endl; - } - - return true; -} - -bool predict() -{ - typedef pair > Argu; - typedef pair > ArgusForOnePredicate; - typedef vector ArgusForPredicates; - - SRL_LoadResource(test_opt.config_dir); - - ofstream output(test_opt.output_file.c_str()); - if (!output) { - ERROR_LOG("Failed to open [%s]", - test_opt.output_file.c_str()); - return false; - } - - Corpus corpus(test_opt.test_file); - vector corpus_lines; - Sentence sentence; - - while (corpus.get_next_block(corpus_lines)) { - sentence.from_corpus_block(corpus_lines); - const size_t row_count = sentence.get_row_count(); - - vector words, poss, nes; - vector< pair > parses; - for (size_t i = 1; i <= row_count; ++i) { - - words.push_back(sentence.get_FORM(i)); - poss.push_back(sentence.get_PPOS(i)); - nes.push_back("O"); // unused feature - - int phead = sentence.get_PHEAD(i); - string pdeprel = sentence.get_PDEPREL(i); - parses.push_back(make_pair(phead-1, pdeprel)); - } - - ArgusForPredicates srl_result; - SRL(words, poss, nes, parses, srl_result); - - vector predicate_rows; - for (size_t i = 0; i < srl_result.size(); ++i) { - - ArgusForOnePredicate argus = srl_result[i]; - size_t predicate_row = argus.first + 1; // starts from 1 - predicate_rows.push_back(predicate_row); - - } - sentence.set_predicates(predicate_rows); // make room for arguments - - for (size_t i = 0; i < srl_result.size(); ++i) { - - ArgusForOnePredicate argus = srl_result[i]; - - for (size_t j = 0; j < argus.second.size(); ++j) { - Argu argu = argus.second[j]; - sentence.set_argument(i, argu.second.first+1, argu.first); - } - } - - output << sentence.to_corpus_block() << endl; - } - - output.close(); - SRL_ReleaseResource(); - - return true; -} - -int main(int argc, char *argv[]) -{ - /* - * All params are defined in config file - * - * params: path-Chinese.xml - * params: path-srl.cfg - * params: path-corpus - * params: path-feature folder - * params: path-instances - * - * params: option-maxent-solver_type - * params: option-maxent-reg_coefficient - * params: option-maxent-sgd_iter (recommend: default) - * params: option-maxent-sgd_eta0 (recommend: default) - * params: option-maxent-sgd_alpha(recommend: default) - * params: option-maxent-heldout - * - */ - if (argc < 2) { - usage(); return -1; - } - - ConfigParser cfg(argv[1]); - - if (!cfg) { - ERROR_LOG("Failed to parse config file"); - return -1; - } - - parse_cfg(cfg); - - if (__TRAIN_PRG__) { - - // collect training instances for PRG training - TRACE_LOG("Collecting instances for PRG training"); - if (!collect_prg_instances()) { - ERROR_LOG("Failed collect prg instances"); - return -1; - } - - // training PRG model - ME_Model prg_model(me_prg_param); - TRACE_LOG("Training PRG Model"); - train(prg_model, - train_opt.prg_instance_file, - train_opt.prg_model_file); - copy_cfg(train_opt.core_config, - train_opt.dst_config_dir + "/Chinese.xml"); - } - - if (__TRAIN_SRL__) { - // collect training instances for SRL training - TRACE_LOG("Collecting instances for SRL training"); - if (!collect_srl_instances()) { - ERROR_LOG("Failed collect srl instances"); - return -1; - } - - // training SRL model - ME_Model srl_model(me_srl_param); - TRACE_LOG("Training SRL Model"); - train(srl_model, - train_opt.srl_instance_file, - train_opt.srl_model_file); - copy_cfg(train_opt.srl_config, - train_opt.dst_config_dir + "/srl.cfg"); - } - - if (__TEST__) { - TRACE_LOG("Predicting [%s]", test_opt.test_file.c_str()); - if (!predict()) { - TRACE_LOG("Failed predicting [%s]", - test_opt.test_file.c_str()); - return -1; - } - TRACE_LOG("Output to [%s]", test_opt.output_file.c_str()); - } - - return 0; -} - diff --git a/src/srl/options.h b/src/srl/options.h deleted file mode 100644 index 009515834..000000000 --- a/src/srl/options.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef __LTP_SRL_OPTIONS_H__ -#define __LTP_SRL_OPTIONS_H__ - -// namespace ltp { -// namespace srl { - -struct TrainOptions { - std::string prg_train_file; - std::string srl_train_file; - std::string core_config; // Chinese.xml - std::string srl_config; // srl.cfg - std::string srl_feature_dir; - std::string srl_instance_file; - std::string prg_instance_file; - std::string srl_model_file; - std::string prg_model_file; - std::string dst_config_dir; // destination cfgs -}; - -struct TestOptions { - std::string test_file; - std::string config_dir; - std::string output_file; -}; - -#endif diff --git a/src/srl/tool/CMakeLists.txt b/src/srl/tool/CMakeLists.txt new file mode 100644 index 000000000..b550ec93e --- /dev/null +++ b/src/srl/tool/CMakeLists.txt @@ -0,0 +1,5 @@ +add_executable(srl_merge_tool merge.cpp process/merge.h) +target_link_libraries(srl_merge_tool dynet srl_srl_static_lib ${LIBS}) +set_target_properties (srl_merge_tool PROPERTIES + OUTPUT_NAME srl_merge_tool + RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/) diff --git a/src/srl/tool/config/ToolConf.h b/src/srl/tool/config/ToolConf.h new file mode 100644 index 000000000..1f3ce903a --- /dev/null +++ b/src/srl/tool/config/ToolConf.h @@ -0,0 +1,30 @@ +// +// Created by liu on 2017/2/22. +// + +#ifndef PROJECT_TOOLCONF_H +#define PROJECT_TOOLCONF_H + +#include "base/config.h" +#include "base/debug.h" + +class MergerConfig : virtual public base::DebugConfig { +public: + string pi_config; + string srl_config; + string pi_model; + string srl_model; + string embedding; + string out_model; + MergerConfig(string confName = "Configuration"): base::DebugConfig(confName) { + registerConf ("pi_config", STRING, pi_config, "pi_config"); + registerConf ("srl_config", STRING, srl_config, "srl_config"); + registerConf ("pi_model", STRING, pi_model, "pi_model"); + registerConf ("srl_model", STRING, srl_model, "srl_model"); + registerConf ("embedding", STRING, embedding, "embedding"); + registerConf ("out_model", STRING, out_model, "out_model"); + } + +}; + +#endif //PROJECT_TOOLCONF_H diff --git a/src/srl/tool/merge.cpp b/src/srl/tool/merge.cpp new file mode 100644 index 000000000..34cc5b8a8 --- /dev/null +++ b/src/srl/tool/merge.cpp @@ -0,0 +1,12 @@ +// +// Created by liu on 2017/5/24. +// + +#include "process/merge.h" +#include "base/processLoader.h" + +using namespace std; +int main(int argc, char * argv[]) { + base::ProcessLoader processLoader(argc, argv); + return processLoader.runProcess(); +} \ No newline at end of file diff --git a/src/srl/tool/process/merge.h b/src/srl/tool/process/merge.h new file mode 100644 index 000000000..cf1f36e85 --- /dev/null +++ b/src/srl/tool/process/merge.h @@ -0,0 +1,59 @@ +// +// Created by liu on 2017/5/24. +// + +#ifndef BILSTM_SRL_MERGE_H +#define BILSTM_SRL_MERGE_H + +#include +#include "base/process.h" +#include "../config/ToolConf.h" +#include "dynet/dynet.h" +#include "../../Srl/config/SrlSrlConfig.h" +#include "../../Pi/config/SrlPiConfig.h" +#include "../../Pi/model/SrlPiModel.h" +#include "../../Srl/model/SrlSrlModel.h" + +class Merge : public base::Process { +public: + Merge(MergerConfig &config) : base::Process (config){} + + virtual void main() { + dynet::DynetParams params; + params.mem_descriptor = "2000"; + dynet::initialize(params); + + SrlPiBaseConfig piConfig; + SrlSrlBaseConfig srlConfig; + piConfig.init(config.pi_config); piConfig.model = config.pi_model; + srlConfig.init(config.srl_config); srlConfig.model = config.srl_model; + + ExtractorFileToWordEmb conv; + conv.init(config.embedding); + unordered_map> embedding = conv.run(); + + PiModel pi_model(piConfig); + pi_model.loadDict(); + pi_model.init(); + pi_model.load(); + pi_model.initEmbedding(embedding); + + SrlSrlModel srl_model(srlConfig); + srl_model.loadDict(); + srl_model.init(); + srl_model.load(); + srl_model.initEmbedding(embedding); + + ofstream out(config.out_model); + boost::archive::binary_oarchive oa(out); + oa << piConfig; + oa << srlConfig; + oa << embedding; + pi_model.save(oa); + srl_model.save(oa); + out.close(); + } +}; + + +#endif //BILSTM_SRL_MERGE_H diff --git a/src/srl/tree.hh b/src/srl/tree.hh deleted file mode 100644 index 4ca92d5af..000000000 --- a/src/srl/tree.hh +++ /dev/null @@ -1,2685 +0,0 @@ -/* - - $Id: tree.hh,v 1.150 2008/02/28 21:04:33 peekas Exp $ - - STL-like templated tree class. - Copyright (C) 2001-2006 Kasper Peeters . - -*/ - -/** \mainpage tree.hh - \author Kasper Peeters - \version 2.51 - \date 28-Feb-2008 - \see http://www.aei.mpg.de/~peekas/tree/ - \see http://www.aei.mpg.de/~peekas/tree/ChangeLog - - The tree.hh library for C++ provides an STL-like container class - for n-ary trees, templated over the data stored at the - nodes. Various types of iterators are provided (post-order, - pre-order, and others). Where possible the access methods are - compatible with the STL or alternative algorithms are - available. -*/ - - -/* - The tree.hh code is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 or 3. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -/** \todo - - New-style move members are not completely finished yet. - - It would be good to have an iterator which can iterate over all - nodes below a given node. Something similar to the leaf iterator - we have right now, but not restricted to the leaves. - - If a range uses const iter_base& as end iterator, things will - inevitably go wrong, because upcast from iter_base to a non-sibling_iter - is incorrect. This upcast should be removed (and then all illegal uses - as previously in 'equal' will be flagged by the compiler). This requires - new copy constructors though. - - There's a bug in replace(sibling_iterator, ...) when the ranges - sit next to each other. Turned up in append_child(iter,iter) - but has been avoided now. - - "std::operator<" does not work correctly on our iterators, and for some - reason a globally defined template operator< did not get picked up. - Using a comparison class now, but this should be investigated. -*/ - -#ifndef tree_hh_ -#define tree_hh_ - -#include -#include -#include -#include -#include -#include -#include - -// HP-style construct/destroy have gone from the standard, -// so here is a copy. - -namespace kp { - -template -void constructor(T1* p, T2& val) - { - new ((void *) p) T1(val); - } - -template -void constructor(T1* p) - { - new ((void *) p) T1; - } - -template -void destructor(T1* p) - { - p->~T1(); - } - -}; - -/// A node in the tree, combining links to other nodes as well as the actual data. -template -class tree_node_ { // size: 5*4=20 bytes (on 32 bit arch), can be reduced by 8. - public: - tree_node_ *parent; - tree_node_ *first_child, *last_child; - tree_node_ *prev_sibling, *next_sibling; - T data; -}; // __attribute__((packed)); - -template > > -class tree { - protected: - typedef tree_node_ tree_node; - public: - /// Value of the data stored at a node. - typedef T value_type; - - class iterator_base; - class pre_order_iterator; - class post_order_iterator; - class sibling_iterator; - class leaf_iterator; - - tree(); - tree(const T&); - tree(const iterator_base&); - tree(const tree&); - ~tree(); - void operator=(const tree&); - - /// Base class for iterators, only pointers stored, no traversal logic. -#ifdef __SGI_STL_PORT - class iterator_base : public stlport::bidirectional_iterator { -#else - class iterator_base { -#endif - public: - typedef T value_type; - typedef T* pointer; - typedef T& reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef std::bidirectional_iterator_tag iterator_category; - - iterator_base(); - iterator_base(tree_node *); - - T& operator*() const; - T* operator->() const; - - /// When called, the next increment/decrement skips children of this node. - void skip_children(); - /// Number of children of the node pointed to by the iterator. - unsigned int number_of_children() const; - - sibling_iterator begin() const; - sibling_iterator end() const; - - tree_node *node; - protected: - bool skip_current_children_; - }; - - /// Depth-first iterator, first accessing the node, then its children. - class pre_order_iterator : public iterator_base { - public: - pre_order_iterator(); - pre_order_iterator(tree_node *); - pre_order_iterator(const iterator_base&); - pre_order_iterator(const sibling_iterator&); - - bool operator==(const pre_order_iterator&) const; - bool operator!=(const pre_order_iterator&) const; - pre_order_iterator& operator++(); - pre_order_iterator& operator--(); - pre_order_iterator operator++(int); - pre_order_iterator operator--(int); - pre_order_iterator& operator+=(unsigned int); - pre_order_iterator& operator-=(unsigned int); - }; - - /// Depth-first iterator, first accessing the children, then the node itself. - class post_order_iterator : public iterator_base { - public: - post_order_iterator(); - post_order_iterator(tree_node *); - post_order_iterator(const iterator_base&); - post_order_iterator(const sibling_iterator&); - - bool operator==(const post_order_iterator&) const; - bool operator!=(const post_order_iterator&) const; - post_order_iterator& operator++(); - post_order_iterator& operator--(); - post_order_iterator operator++(int); - post_order_iterator operator--(int); - post_order_iterator& operator+=(unsigned int); - post_order_iterator& operator-=(unsigned int); - - /// Set iterator to the first child as deep as possible down the tree. - void descend_all(); - }; - - /// Breadth-first iterator, using a queue - class breadth_first_queued_iterator : public iterator_base { - public: - breadth_first_queued_iterator(); - breadth_first_queued_iterator(tree_node *); - breadth_first_queued_iterator(const iterator_base&); - - bool operator==(const breadth_first_queued_iterator&) const; - bool operator!=(const breadth_first_queued_iterator&) const; - breadth_first_queued_iterator& operator++(); - breadth_first_queued_iterator operator++(int); - breadth_first_queued_iterator& operator+=(unsigned int); - - private: - std::queue traversal_queue; - }; - - /// The default iterator types throughout the tree class. - typedef pre_order_iterator iterator; - typedef breadth_first_queued_iterator breadth_first_iterator; - - /// Iterator which traverses only the nodes at a given depth from the root. - class fixed_depth_iterator : public iterator_base { - public: - fixed_depth_iterator(); - fixed_depth_iterator(tree_node *); - fixed_depth_iterator(const iterator_base&); - fixed_depth_iterator(const sibling_iterator&); - fixed_depth_iterator(const fixed_depth_iterator&); - - bool operator==(const fixed_depth_iterator&) const; - bool operator!=(const fixed_depth_iterator&) const; - fixed_depth_iterator& operator++(); - fixed_depth_iterator& operator--(); - fixed_depth_iterator operator++(int); - fixed_depth_iterator operator--(int); - fixed_depth_iterator& operator+=(unsigned int); - fixed_depth_iterator& operator-=(unsigned int); - - tree_node *first_parent_; - private: - void set_first_parent_(); - void find_leftmost_parent_(); - }; - - /// Iterator which traverses only the nodes which are siblings of each other. - class sibling_iterator : public iterator_base { - public: - sibling_iterator(); - sibling_iterator(tree_node *); - sibling_iterator(const sibling_iterator&); - sibling_iterator(const iterator_base&); - - bool operator==(const sibling_iterator&) const; - bool operator!=(const sibling_iterator&) const; - sibling_iterator& operator++(); - sibling_iterator& operator--(); - sibling_iterator operator++(int); - sibling_iterator operator--(int); - sibling_iterator& operator+=(unsigned int); - sibling_iterator& operator-=(unsigned int); - - tree_node *range_first() const; - tree_node *range_last() const; - tree_node *parent_; - private: - void set_parent_(); - }; - - /// Iterator which traverses only the leaves. - class leaf_iterator : public iterator_base { - public: - leaf_iterator(); - leaf_iterator(tree_node *, tree_node *top=0); - leaf_iterator(const sibling_iterator&); - leaf_iterator(const iterator_base&); - - bool operator==(const leaf_iterator&) const; - bool operator!=(const leaf_iterator&) const; - leaf_iterator& operator++(); - leaf_iterator& operator--(); - leaf_iterator operator++(int); - leaf_iterator operator--(int); - leaf_iterator& operator+=(unsigned int); - leaf_iterator& operator-=(unsigned int); - private: - tree_node *top_node; - }; - - /// Return iterator to the beginning of the tree. - inline pre_order_iterator begin() const; - /// Return iterator to the end of the tree. - inline pre_order_iterator end() const; - /// Return post-order iterator to the beginning of the tree. - post_order_iterator begin_post() const; - /// Return post-order end iterator of the tree. - post_order_iterator end_post() const; - /// Return fixed-depth iterator to the first node at a given depth from the given iterator. - fixed_depth_iterator begin_fixed(const iterator_base&, unsigned int) const; - /// Return fixed-depth end iterator. - fixed_depth_iterator end_fixed(const iterator_base&, unsigned int) const; - /// Return breadth-first iterator to the first node at a given depth. - breadth_first_queued_iterator begin_breadth_first() const; - /// Return breadth-first end iterator. - breadth_first_queued_iterator end_breadth_first() const; - /// Return sibling iterator to the first child of given node. - sibling_iterator begin(const iterator_base&) const; - /// Return sibling end iterator for children of given node. - sibling_iterator end(const iterator_base&) const; - /// Return leaf iterator to the first leaf of the tree. - leaf_iterator begin_leaf() const; - /// Return leaf end iterator for entire tree. - leaf_iterator end_leaf() const; - /// Return leaf iterator to the first leaf of the subtree at the given node. - leaf_iterator begin_leaf(const iterator_base& top) const; - /// Return leaf end iterator for the subtree at the given node. - leaf_iterator end_leaf(const iterator_base& top) const; - - /// Return iterator to the parent of a node. - template static iter parent(iter); - /// Return iterator to the previous sibling of a node. - template iter previous_sibling(iter) const; - /// Return iterator to the next sibling of a node. - template iter next_sibling(iter) const; - /// Return iterator to the next node at a given depth. - template iter next_at_same_depth(iter) const; - - /// Erase all nodes of the tree. - void clear(); - /// Erase element at position pointed to by iterator, return incremented iterator. - template iter erase(iter); - /// Erase all children of the node pointed to by iterator. - void erase_children(const iterator_base&); - - /// Insert empty node as last/first child of node pointed to by position. - template iter append_child(iter position); - template iter prepend_child(iter position); - /// Insert node as last/first child of node pointed to by position. - template iter append_child(iter position, const T& x); - template iter prepend_child(iter position, const T& x); - /// Append the node (plus its children) at other_position as last/first child of position. - template iter append_child(iter position, iter other_position); - template iter prepend_child(iter position, iter other_position); - /// Append the nodes in the from-to range (plus their children) as last/first children of position. - template iter append_children(iter position, sibling_iterator from, sibling_iterator to); - template iter prepend_children(iter position, sibling_iterator from, sibling_iterator to); - - /// Short-hand to insert topmost node in otherwise empty tree. - pre_order_iterator set_head(const T& x); - /// Insert node as previous sibling of node pointed to by position. - template iter insert(iter position, const T& x); - /// Specialisation of previous member. - sibling_iterator insert(sibling_iterator position, const T& x); - /// Insert node (with children) pointed to by subtree as previous sibling of node pointed to by position. - template iter insert_subtree(iter position, const iterator_base& subtree); - /// Insert node as next sibling of node pointed to by position. - template iter insert_after(iter position, const T& x); - /// Insert node (with children) pointed to by subtree as next sibling of node pointed to by position. - template iter insert_subtree_after(iter position, const iterator_base& subtree); - - /// Replace node at 'position' with other node (keeping same children); 'position' becomes invalid. - template iter replace(iter position, const T& x); - /// Replace node at 'position' with subtree starting at 'from' (do not erase subtree at 'from'); see above. - template iter replace(iter position, const iterator_base& from); - /// Replace string of siblings (plus their children) with copy of a new string (with children); see above - sibling_iterator replace(sibling_iterator orig_begin, sibling_iterator orig_end, - sibling_iterator new_begin, sibling_iterator new_end); - - /// Move all children of node at 'position' to be siblings, returns position. - template iter flatten(iter position); - /// Move nodes in range to be children of 'position'. - template iter reparent(iter position, sibling_iterator begin, sibling_iterator end); - /// Move all child nodes of 'from' to be children of 'position'. - template iter reparent(iter position, iter from); - - /// Replace node with a new node, making the old node a child of the new node. - template iter wrap(iter position, const T& x); - - /// Move 'source' node (plus its children) to become the next sibling of 'target'. - template iter move_after(iter target, iter source); - /// Move 'source' node (plus its children) to become the previous sibling of 'target'. - template iter move_before(iter target, iter source); - sibling_iterator move_before(sibling_iterator target, sibling_iterator source); - /// Move 'source' node (plus its children) to become the node at 'target' (erasing the node at 'target'). - template iter move_ontop(iter target, iter source); - - /// Merge with other tree, creating new branches and leaves only if they are not already present. - void merge(sibling_iterator, sibling_iterator, sibling_iterator, sibling_iterator, - bool duplicate_leaves=false); - /// Sort (std::sort only moves values of nodes, this one moves children as well). - void sort(sibling_iterator from, sibling_iterator to, bool deep=false); - template - void sort(sibling_iterator from, sibling_iterator to, StrictWeakOrdering comp, bool deep=false); - /// Compare two ranges of nodes (compares nodes as well as tree structure). - template - bool equal(const iter& one, const iter& two, const iter& three) const; - template - bool equal(const iter& one, const iter& two, const iter& three, BinaryPredicate) const; - template - bool equal_subtree(const iter& one, const iter& two) const; - template - bool equal_subtree(const iter& one, const iter& two, BinaryPredicate) const; - /// Extract a new tree formed by the range of siblings plus all their children. - tree subtree(sibling_iterator from, sibling_iterator to) const; - void subtree(tree&, sibling_iterator from, sibling_iterator to) const; - /// Exchange the node (plus subtree) with its sibling node (do nothing if no sibling present). - void swap(sibling_iterator it); - /// Exchange two nodes (plus subtrees) - void swap(iterator, iterator); - - /// Count the total number of nodes. - int size() const; - /// Count the total number of nodes below the indicated node (plus one). - int size(const iterator_base&) const; - /// Check if tree is empty. - bool empty() const; - /// Compute the depth to the root. - int depth(const iterator_base&) const; - /// Determine the maximal depth of the tree. - int max_depth() const; - /// Determine the maximal depth of the tree below a given one. - int max_depth(const iterator_base&) const; - /// Count the number of children of node at position. - static unsigned int number_of_children(const iterator_base&); - /// Count the number of 'next' siblings of node at iterator. - unsigned int number_of_siblings(const iterator_base&) const; - /// Determine whether node at position is in the subtrees with root in the range. - bool is_in_subtree(const iterator_base& position, const iterator_base& begin, - const iterator_base& end) const; - /// Determine whether the iterator is an 'end' iterator and thus not actually pointing to a node. - bool is_valid(const iterator_base&) const; - - /// Determine the index of a node in the range of siblings to which it belongs. - unsigned int index(sibling_iterator it) const; - /// Inverse of 'index': return the n-th child of the node at position. - sibling_iterator child(const iterator_base& position, unsigned int) const; - - /// Comparator class for iterators (compares pointer values; why doesn't this work automatically?) - class iterator_base_less { - public: - bool operator()(const typename tree::iterator_base& one, - const typename tree::iterator_base& two) const - { - return one.node < two.node; - } - }; - tree_node *head, *feet; // head/feet are always dummy; if an iterator points to them it is invalid - private: - tree_node_allocator alloc_; - void head_initialise_(); - void copy_(const tree& other); - - /// Comparator class for two nodes of a tree (used for sorting and searching). - template - class compare_nodes { - public: - compare_nodes(StrictWeakOrdering comp) : comp_(comp) {}; - - bool operator()(const tree_node *a, const tree_node *b) - { - static StrictWeakOrdering comp; - return comp(a->data, b->data); - } - private: - StrictWeakOrdering comp_; - }; -}; - -//template -//class iterator_base_less { -// public: -// bool operator()(const typename tree::iterator_base& one, -// const typename tree::iterator_base& two) const -// { -// txtout << "operatorclass<" << one.node < two.node << std::endl; -// return one.node < two.node; -// } -//}; - -// template -// bool operator<(const typename tree::iterator& one, -// const typename tree::iterator& two) -// { -// txtout << "operator< " << one.node < two.node << std::endl; -// if(one.node < two.node) return true; -// return false; -// } -// -// template -// bool operator==(const typename tree::iterator& one, -// const typename tree::iterator& two) -// { -// txtout << "operator== " << one.node == two.node << std::endl; -// if(one.node == two.node) return true; -// return false; -// } -// -// template -// bool operator>(const typename tree::iterator_base& one, -// const typename tree::iterator_base& two) -// { -// txtout << "operator> " << one.node < two.node << std::endl; -// if(one.node > two.node) return true; -// return false; -// } - - - -// Tree - -template -tree::tree() - { - head_initialise_(); - } - -template -tree::tree(const T& x) - { - head_initialise_(); - set_head(x); - } - -template -tree::tree(const iterator_base& other) - { - head_initialise_(); - set_head((*other)); - replace(begin(), other); - } - -template -tree::~tree() - { - clear(); - alloc_.deallocate(head,1); - alloc_.deallocate(feet,1); - } - -template -void tree::head_initialise_() - { - head = alloc_.allocate(1,0); // MSVC does not have default second argument - feet = alloc_.allocate(1,0); - - head->parent=0; - head->first_child=0; - head->last_child=0; - head->prev_sibling=0; //head; - head->next_sibling=feet; //head; - - feet->parent=0; - feet->first_child=0; - feet->last_child=0; - feet->prev_sibling=head; - feet->next_sibling=0; - } - -template -void tree::operator=(const tree& other) - { - copy_(other); - } - -template -tree::tree(const tree& other) - { - head_initialise_(); - copy_(other); - } - -template -void tree::copy_(const tree& other) - { - clear(); - pre_order_iterator it=other.begin(), to=begin(); - while(it!=other.end()) { - to=insert(to, (*it)); - it.skip_children(); - ++it; - } - to=begin(); - it=other.begin(); - while(it!=other.end()) { - to=replace(to, it); - to.skip_children(); - it.skip_children(); - ++to; - ++it; - } - } - -template -void tree::clear() - { - if(head) - while(head->next_sibling!=feet) - erase(pre_order_iterator(head->next_sibling)); - } - -template -void tree::erase_children(const iterator_base& it) - { -// std::cout << "erase_children " << it.node << std::endl; - if(it.node==0) return; - - tree_node *cur=it.node->first_child; - tree_node *prev=0; - - while(cur!=0) { - prev=cur; - cur=cur->next_sibling; - erase_children(pre_order_iterator(prev)); - kp::destructor(&prev->data); - alloc_.deallocate(prev,1); - } - it.node->first_child=0; - it.node->last_child=0; -// std::cout << "exit" << std::endl; - } - -template -template -iter tree::erase(iter it) - { - tree_node *cur=it.node; - assert(cur!=head); - iter ret=it; - ret.skip_children(); - ++ret; - erase_children(it); - if(cur->prev_sibling==0) { - cur->parent->first_child=cur->next_sibling; - } - else { - cur->prev_sibling->next_sibling=cur->next_sibling; - } - if(cur->next_sibling==0) { - cur->parent->last_child=cur->prev_sibling; - } - else { - cur->next_sibling->prev_sibling=cur->prev_sibling; - } - - kp::destructor(&cur->data); - alloc_.deallocate(cur,1); - return ret; - } - -template -typename tree::pre_order_iterator tree::begin() const - { - return pre_order_iterator(head->next_sibling); - } - -template -typename tree::pre_order_iterator tree::end() const - { - return pre_order_iterator(feet); - } - -template -typename tree::breadth_first_queued_iterator tree::begin_breadth_first() const - { - return breadth_first_queued_iterator(head->next_sibling); - } - -template -typename tree::breadth_first_queued_iterator tree::end_breadth_first() const - { - return breadth_first_queued_iterator(); - } - -template -typename tree::post_order_iterator tree::begin_post() const - { - tree_node *tmp=head->next_sibling; - if(tmp!=feet) { - while(tmp->first_child) - tmp=tmp->first_child; - } - return post_order_iterator(tmp); - } - -template -typename tree::post_order_iterator tree::end_post() const - { - return post_order_iterator(feet); - } - -template -typename tree::fixed_depth_iterator tree::begin_fixed(const iterator_base& pos, unsigned int dp) const - { - tree_node *tmp=pos.node; - unsigned int curdepth=0; - while(curdepthfirst_child==0) { - if(tmp->next_sibling==0) { - // try to walk up and then right again - do { - tmp=tmp->parent; - if(tmp==0) - throw std::range_error("tree: begin_fixed out of range"); - --curdepth; - } while(tmp->next_sibling==0); - } - tmp=tmp->next_sibling; - } - tmp=tmp->first_child; - ++curdepth; - } - return tmp; - } - -template -typename tree::fixed_depth_iterator tree::end_fixed(const iterator_base& pos, unsigned int dp) const - { - assert(1==0); // FIXME: not correct yet: use is_valid() as a temporary workaround - tree_node *tmp=pos.node; - unsigned int curdepth=1; - while(curdepthfirst_child==0) { - tmp=tmp->next_sibling; - if(tmp==0) - throw std::range_error("tree: end_fixed out of range"); - } - tmp=tmp->first_child; - ++curdepth; - } - return tmp; - } - -template -typename tree::sibling_iterator tree::begin(const iterator_base& pos) const - { - assert(pos.node!=0); - if(pos.node->first_child==0) { - return end(pos); - } - return pos.node->first_child; - } - -template -typename tree::sibling_iterator tree::end(const iterator_base& pos) const - { - sibling_iterator ret(0); - ret.parent_=pos.node; - return ret; - } - -template -typename tree::leaf_iterator tree::begin_leaf() const - { - tree_node *tmp=head->next_sibling; - if(tmp!=feet) { - while(tmp->first_child) - tmp=tmp->first_child; - } - return leaf_iterator(tmp); - } - -template -typename tree::leaf_iterator tree::end_leaf() const - { - return leaf_iterator(feet); - } - -template -typename tree::leaf_iterator tree::begin_leaf(const iterator_base& top) const - { - tree_node *tmp=top.node; - while(tmp->first_child) - tmp=tmp->first_child; - return leaf_iterator(tmp, top.node); - } - -template -typename tree::leaf_iterator tree::end_leaf(const iterator_base& top) const - { - return leaf_iterator(top.node, top.node); - } - -template -template -iter tree::parent(iter position) - { - assert(position.node!=0); - return iter(position.node->parent); - } - -template -template -iter tree::previous_sibling(iter position) const - { - assert(position.node!=0); - iter ret(position); - ret.node=position.node->prev_sibling; - return ret; - } - -template -template -iter tree::next_sibling(iter position) const - { - assert(position.node!=0); - iter ret(position); - ret.node=position.node->next_sibling; - return ret; - } - -template -template -iter tree::next_at_same_depth(iter position) const - { - assert(position.node!=0); - iter ret(position); - - if(position.node->next_sibling) { - ret.node=position.node->next_sibling; - } - else { - int relative_depth=0; - upper: - do { - ret.node=ret.node->parent; - if(ret.node==0) return ret; - --relative_depth; - } while(ret.node->next_sibling==0); - lower: - ret.node=ret.node->next_sibling; - while(ret.node->first_child==0) { - if(ret.node->next_sibling==0) - goto upper; - ret.node=ret.node->next_sibling; - if(ret.node==0) return ret; - } - while(relative_depth<0 && ret.node->first_child!=0) { - ret.node=ret.node->first_child; - ++relative_depth; - } - if(relative_depth<0) { - if(ret.node->next_sibling==0) goto upper; - else goto lower; - } - } - return ret; - } - -template -template -iter tree::append_child(iter position) - { - assert(position.node!=head); - assert(position.node); - - tree_node *tmp=alloc_.allocate(1,0); - kp::constructor(&tmp->data); - tmp->first_child=0; - tmp->last_child=0; - - tmp->parent=position.node; - if(position.node->last_child!=0) { - position.node->last_child->next_sibling=tmp; - } - else { - position.node->first_child=tmp; - } - tmp->prev_sibling=position.node->last_child; - position.node->last_child=tmp; - tmp->next_sibling=0; - return tmp; - } - -template -template -iter tree::prepend_child(iter position) - { - assert(position.node!=head); - assert(position.node); - - tree_node *tmp=alloc_.allocate(1,0); - kp::constructor(&tmp->data); - tmp->first_child=0; - tmp->last_child=0; - - tmp->parent=position.node; - if(position.node->first_child!=0) { - position.node->first_child->prev_sibling=tmp; - } - else { - position.node->last_child=tmp; - } - tmp->next_sibling=position.node->first_child; - position.node->prev_child=tmp; - tmp->prev_sibling=0; - return tmp; - } - -template -template -iter tree::append_child(iter position, const T& x) - { - // If your program fails here you probably used 'append_child' to add the top - // node to an empty tree. From version 1.45 the top element should be added - // using 'insert'. See the documentation for further information, and sorry about - // the API change. - assert(position.node!=head); - assert(position.node); - - tree_node* tmp = alloc_.allocate(1,0); - kp::constructor(&tmp->data, x); - tmp->first_child=0; - tmp->last_child=0; - - tmp->parent=position.node; - if(position.node->last_child!=0) { - position.node->last_child->next_sibling=tmp; - } - else { - position.node->first_child=tmp; - } - tmp->prev_sibling=position.node->last_child; - position.node->last_child=tmp; - tmp->next_sibling=0; - return tmp; - } - -template -template -iter tree::prepend_child(iter position, const T& x) - { - assert(position.node!=head); - assert(position.node); - - tree_node* tmp = alloc_.allocate(1,0); - kp::constructor(&tmp->data, x); - tmp->first_child=0; - tmp->last_child=0; - - tmp->parent=position.node; - if(position.node->first_child!=0) { - position.node->first_child->prev_sibling=tmp; - } - else { - position.node->last_child=tmp; - } - tmp->next_sibling=position.node->first_child; - position.node->first_child=tmp; - tmp->prev_sibling=0; - return tmp; - } - -template -template -iter tree::append_child(iter position, iter other) - { - assert(position.node!=head); - assert(position.node); - - sibling_iterator aargh=append_child(position, value_type()); - return replace(aargh, other); - } - -template -template -iter tree::prepend_child(iter position, iter other) - { - assert(position.node!=head); - assert(position.node); - - sibling_iterator aargh=prepend_child(position, value_type()); - return replace(aargh, other); - } - -template -template -iter tree::append_children(iter position, sibling_iterator from, sibling_iterator to) - { - assert(position.node!=head); - assert(position.node); - - iter ret=from; - - while(from!=to) { - insert_subtree(position.end(), from); - ++from; - } - return ret; - } - -template -template -iter tree::prepend_children(iter position, sibling_iterator from, sibling_iterator to) - { - assert(position.node!=head); - assert(position.node); - - iter ret=from; - - while(from!=to) { - insert_subtree(position.begin(), from); - ++from; - } - return ret; - } - -template -typename tree::pre_order_iterator tree::set_head(const T& x) - { - assert(head->next_sibling==feet); - return insert(iterator(feet), x); - } - -template -template -iter tree::insert(iter position, const T& x) - { - if(position.node==0) { - position.node=feet; // Backward compatibility: when calling insert on a null node, - // insert before the feet. - } - tree_node* tmp = alloc_.allocate(1,0); - kp::constructor(&tmp->data, x); - tmp->first_child=0; - tmp->last_child=0; - - tmp->parent=position.node->parent; - tmp->next_sibling=position.node; - tmp->prev_sibling=position.node->prev_sibling; - position.node->prev_sibling=tmp; - - if(tmp->prev_sibling==0) { - if(tmp->parent) // when inserting nodes at the head, there is no parent - tmp->parent->first_child=tmp; - } - else - tmp->prev_sibling->next_sibling=tmp; - return tmp; - } - -template -typename tree::sibling_iterator tree::insert(sibling_iterator position, const T& x) - { - tree_node* tmp = alloc_.allocate(1,0); - kp::constructor(&tmp->data, x); - tmp->first_child=0; - tmp->last_child=0; - - tmp->next_sibling=position.node; - if(position.node==0) { // iterator points to end of a subtree - tmp->parent=position.parent_; - tmp->prev_sibling=position.range_last(); - tmp->parent->last_child=tmp; - } - else { - tmp->parent=position.node->parent; - tmp->prev_sibling=position.node->prev_sibling; - position.node->prev_sibling=tmp; - } - - if(tmp->prev_sibling==0) { - if(tmp->parent) // when inserting nodes at the head, there is no parent - tmp->parent->first_child=tmp; - } - else - tmp->prev_sibling->next_sibling=tmp; - return tmp; - } - -template -template -iter tree::insert_after(iter position, const T& x) - { - tree_node* tmp = alloc_.allocate(1,0); - kp::constructor(&tmp->data, x); - tmp->first_child=0; - tmp->last_child=0; - - tmp->parent=position.node->parent; - tmp->prev_sibling=position.node; - tmp->next_sibling=position.node->next_sibling; - position.node->next_sibling=tmp; - - if(tmp->next_sibling==0) { - if(tmp->parent) // when inserting nodes at the head, there is no parent - tmp->parent->last_child=tmp; - } - else { - tmp->next_sibling->prev_sibling=tmp; - } - return tmp; - } - -template -template -iter tree::insert_subtree(iter position, const iterator_base& subtree) - { - // insert dummy - iter it=insert(position, value_type()); - // replace dummy with subtree - return replace(it, subtree); - } - -template -template -iter tree::insert_subtree_after(iter position, const iterator_base& subtree) - { - // insert dummy - iter it=insert_after(position, value_type()); - // replace dummy with subtree - return replace(it, subtree); - } - -// template -// template -// iter tree::insert_subtree(sibling_iterator position, iter subtree) -// { -// // insert dummy -// iter it(insert(position, value_type())); -// // replace dummy with subtree -// return replace(it, subtree); -// } - -template -template -iter tree::replace(iter position, const T& x) - { - kp::destructor(&position.node->data); - kp::constructor(&position.node->data, x); - return position; - } - -template -template -iter tree::replace(iter position, const iterator_base& from) - { - assert(position.node!=head); - tree_node *current_from=from.node; - tree_node *start_from=from.node; - tree_node *current_to =position.node; - - // replace the node at position with head of the replacement tree at from -// std::cout << "warning!" << position.node << std::endl; - erase_children(position); -// std::cout << "no warning!" << std::endl; - tree_node* tmp = alloc_.allocate(1,0); - kp::constructor(&tmp->data, (*from)); - tmp->first_child=0; - tmp->last_child=0; - if(current_to->prev_sibling==0) { - if(current_to->parent!=0) - current_to->parent->first_child=tmp; - } - else { - current_to->prev_sibling->next_sibling=tmp; - } - tmp->prev_sibling=current_to->prev_sibling; - if(current_to->next_sibling==0) { - if(current_to->parent!=0) - current_to->parent->last_child=tmp; - } - else { - current_to->next_sibling->prev_sibling=tmp; - } - tmp->next_sibling=current_to->next_sibling; - tmp->parent=current_to->parent; - kp::destructor(¤t_to->data); - alloc_.deallocate(current_to,1); - current_to=tmp; - - // only at this stage can we fix 'last' - tree_node *last=from.node->next_sibling; - - pre_order_iterator toit=tmp; - // copy all children - do { - assert(current_from!=0); - if(current_from->first_child != 0) { - current_from=current_from->first_child; - toit=append_child(toit, current_from->data); - } - else { - while(current_from->next_sibling==0 && current_from!=start_from) { - current_from=current_from->parent; - toit=parent(toit); - assert(current_from!=0); - } - current_from=current_from->next_sibling; - if(current_from!=last) { - toit=append_child(parent(toit), current_from->data); - } - } - } while(current_from!=last); - - return current_to; - } - -template -typename tree::sibling_iterator tree::replace( - sibling_iterator orig_begin, - sibling_iterator orig_end, - sibling_iterator new_begin, - sibling_iterator new_end) - { - tree_node *orig_first=orig_begin.node; - tree_node *new_first=new_begin.node; - tree_node *orig_last=orig_first; - while((++orig_begin)!=orig_end) - orig_last=orig_last->next_sibling; - tree_node *new_last=new_first; - while((++new_begin)!=new_end) - new_last=new_last->next_sibling; - - // insert all siblings in new_first..new_last before orig_first - bool first=true; - pre_order_iterator ret; - while(1==1) { - pre_order_iterator tt=insert_subtree(pre_order_iterator(orig_first), pre_order_iterator(new_first)); - if(first) { - ret=tt; - first=false; - } - if(new_first==new_last) - break; - new_first=new_first->next_sibling; - } - - // erase old range of siblings - bool last=false; - tree_node *next=orig_first; - while(1==1) { - if(next==orig_last) - last=true; - next=next->next_sibling; - erase((pre_order_iterator)orig_first); - if(last) - break; - orig_first=next; - } - return ret; - } - -template -template -iter tree::flatten(iter position) - { - if(position.node->first_child==0) - return position; - - tree_node *tmp=position.node->first_child; - while(tmp) { - tmp->parent=position.node->parent; - tmp=tmp->next_sibling; - } - if(position.node->next_sibling) { - position.node->last_child->next_sibling=position.node->next_sibling; - position.node->next_sibling->prev_sibling=position.node->last_child; - } - else { - position.node->parent->last_child=position.node->last_child; - } - position.node->next_sibling=position.node->first_child; - position.node->next_sibling->prev_sibling=position.node; - position.node->first_child=0; - position.node->last_child=0; - - return position; - } - - -template -template -iter tree::reparent(iter position, sibling_iterator begin, sibling_iterator end) - { - tree_node *first=begin.node; - tree_node *last=first; - - assert(first!=position.node); - - if(begin==end) return begin; - // determine last node - while((++begin)!=end) { - last=last->next_sibling; - } - // move subtree - if(first->prev_sibling==0) { - first->parent->first_child=last->next_sibling; - } - else { - first->prev_sibling->next_sibling=last->next_sibling; - } - if(last->next_sibling==0) { - last->parent->last_child=first->prev_sibling; - } - else { - last->next_sibling->prev_sibling=first->prev_sibling; - } - if(position.node->first_child==0) { - position.node->first_child=first; - position.node->last_child=last; - first->prev_sibling=0; - } - else { - position.node->last_child->next_sibling=first; - first->prev_sibling=position.node->last_child; - position.node->last_child=last; - } - last->next_sibling=0; - - tree_node *pos=first; - while(1==1) { - pos->parent=position.node; - if(pos==last) break; - pos=pos->next_sibling; - } - - return first; - } - -template -template iter tree::reparent(iter position, iter from) - { - if(from.node->first_child==0) return position; - return reparent(position, from.node->first_child, end(from)); - } - -template -template iter tree::wrap(iter position, const T& x) - { - assert(position.node!=0); - sibling_iterator fr=position, to=position; - ++to; - iter ret = insert(position, x); - reparent(ret, fr, to); - return ret; - } - -template -template iter tree::move_after(iter target, iter source) - { - tree_node *dst=target.node; - tree_node *src=source.node; - assert(dst); - assert(src); - - if(dst==src) return source; - if(dst->next_sibling) - if(dst->next_sibling==src) // already in the right spot - return source; - - // take src out of the tree - if(src->prev_sibling!=0) src->prev_sibling->next_sibling=src->next_sibling; - else src->parent->first_child=src->next_sibling; - if(src->next_sibling!=0) src->next_sibling->prev_sibling=src->prev_sibling; - else src->parent->last_child=src->prev_sibling; - - // connect it to the new point - if(dst->next_sibling!=0) dst->next_sibling->prev_sibling=src; - else dst->parent->last_child=src; - src->next_sibling=dst->next_sibling; - dst->next_sibling=src; - src->prev_sibling=dst; - src->parent=dst->parent; - return src; - } - -template -template iter tree::move_before(iter target, iter source) - { - tree_node *dst=target.node; - tree_node *src=source.node; - assert(dst); - assert(src); - - if(dst==src) return source; - if(dst->prev_sibling) - if(dst->prev_sibling==src) // already in the right spot - return source; - - // take src out of the tree - if(src->prev_sibling!=0) src->prev_sibling->next_sibling=src->next_sibling; - else src->parent->first_child=src->next_sibling; - if(src->next_sibling!=0) src->next_sibling->prev_sibling=src->prev_sibling; - else src->parent->last_child=src->prev_sibling; - - // connect it to the new point - if(dst->prev_sibling!=0) dst->prev_sibling->next_sibling=src; - else dst->parent->first_child=src; - src->prev_sibling=dst->prev_sibling; - dst->prev_sibling=src; - src->next_sibling=dst; - src->parent=dst->parent; - return src; - } - -// specialisation for sibling_iterators -template -typename tree::sibling_iterator tree::move_before(sibling_iterator target, - sibling_iterator source) - { - tree_node *dst=target.node; - tree_node *src=source.node; - tree_node *dst_prev_sibling; - if(dst==0) { // must then be an end iterator - dst_prev_sibling=target.parent_->last_child; - assert(dst_prev_sibling); - } - else dst_prev_sibling=dst->prev_sibling; - assert(src); - - if(dst==src) return source; - if(dst_prev_sibling) - if(dst_prev_sibling==src) // already in the right spot - return source; - - // take src out of the tree - if(src->prev_sibling!=0) src->prev_sibling->next_sibling=src->next_sibling; - else src->parent->first_child=src->next_sibling; - if(src->next_sibling!=0) src->next_sibling->prev_sibling=src->prev_sibling; - else src->parent->last_child=src->prev_sibling; - - // connect it to the new point - if(dst_prev_sibling!=0) dst_prev_sibling->next_sibling=src; - else target.parent_->first_child=src; - src->prev_sibling=dst_prev_sibling; - if(dst) { - dst->prev_sibling=src; - src->parent=dst->parent; - } - src->next_sibling=dst; - return src; - } - -template -template iter tree::move_ontop(iter target, iter source) - { - tree_node *dst=target.node; - tree_node *src=source.node; - assert(dst); - assert(src); - - if(dst==src) return source; - - // remember connection points - tree_node *b_prev_sibling=dst->prev_sibling; - tree_node *b_next_sibling=dst->next_sibling; - tree_node *b_parent=dst->parent; - - // remove target - erase(target); - - // take src out of the tree - if(src->prev_sibling!=0) src->prev_sibling->next_sibling=src->next_sibling; - else src->parent->first_child=src->next_sibling; - if(src->next_sibling!=0) src->next_sibling->prev_sibling=src->prev_sibling; - else src->parent->last_child=src->prev_sibling; - - // connect it to the new point - if(b_prev_sibling!=0) b_prev_sibling->next_sibling=src; - else b_parent->first_child=src; - if(b_next_sibling!=0) b_next_sibling->prev_sibling=src; - else b_parent->last_child=src; - src->prev_sibling=b_prev_sibling; - src->next_sibling=b_next_sibling; - src->parent=b_parent; - return src; - } - -template -void tree::merge(sibling_iterator to1, sibling_iterator to2, - sibling_iterator from1, sibling_iterator from2, - bool duplicate_leaves) - { - sibling_iterator fnd; - while(from1!=from2) { - if((fnd=std::find(to1, to2, (*from1))) != to2) { // element found - if(from1.begin()==from1.end()) { // full depth reached - if(duplicate_leaves) - append_child(parent(to1), (*from1)); - } - else { // descend further - merge(fnd.begin(), fnd.end(), from1.begin(), from1.end(), duplicate_leaves); - } - } - else { // element missing - insert_subtree(to2, from1); - } - ++from1; - } - } - - -template -void tree::sort(sibling_iterator from, sibling_iterator to, bool deep) - { - std::less comp; - sort(from, to, comp, deep); - } - -template -template -void tree::sort(sibling_iterator from, sibling_iterator to, - StrictWeakOrdering comp, bool deep) - { - if(from==to) return; - // make list of sorted nodes - // CHECK: if multiset stores equivalent nodes in the order in which they - // are inserted, then this routine should be called 'stable_sort'. - std::multiset > nodes(comp); - sibling_iterator it=from, it2=to; - while(it != to) { - nodes.insert(it.node); - ++it; - } - // reassemble - --it2; - - // prev and next are the nodes before and after the sorted range - tree_node *prev=from.node->prev_sibling; - tree_node *next=it2.node->next_sibling; - typename std::multiset >::iterator nit=nodes.begin(), eit=nodes.end(); - if(prev==0) { - if((*nit)->parent!=0) // to catch "sorting the head" situations, when there is no parent - (*nit)->parent->first_child=(*nit); - } - else prev->next_sibling=(*nit); - - --eit; - while(nit!=eit) { - (*nit)->prev_sibling=prev; - if(prev) - prev->next_sibling=(*nit); - prev=(*nit); - ++nit; - } - // prev now points to the last-but-one node in the sorted range - if(prev) - prev->next_sibling=(*eit); - - // eit points to the last node in the sorted range. - (*eit)->next_sibling=next; - (*eit)->prev_sibling=prev; // missed in the loop above - if(next==0) { - if((*eit)->parent!=0) // to catch "sorting the head" situations, when there is no parent - (*eit)->parent->last_child=(*eit); - } - else next->prev_sibling=(*eit); - - if(deep) { // sort the children of each node too - sibling_iterator bcs(*nodes.begin()); - sibling_iterator ecs(*eit); - ++ecs; - while(bcs!=ecs) { - sort(begin(bcs), end(bcs), comp, deep); - ++bcs; - } - } - } - -template -template -bool tree::equal(const iter& one_, const iter& two, const iter& three_) const - { - std::equal_to comp; - return equal(one_, two, three_, comp); - } - -template -template -bool tree::equal_subtree(const iter& one_, const iter& two_) const - { - std::equal_to comp; - return equal_subtree(one_, two_, comp); - } - -template -template -bool tree::equal(const iter& one_, const iter& two, const iter& three_, BinaryPredicate fun) const - { - pre_order_iterator one(one_), three(three_); - -// if(one==two && is_valid(three) && three.number_of_children()!=0) -// return false; - while(one!=two && is_valid(three)) { - if(!fun(*one,*three)) - return false; - if(one.number_of_children()!=three.number_of_children()) - return false; - ++one; - ++three; - } - return true; - } - -template -template -bool tree::equal_subtree(const iter& one_, const iter& two_, BinaryPredicate fun) const - { - pre_order_iterator one(one_), two(two_); - - if(!fun(*one,*two)) return false; - if(number_of_children(one)!=number_of_children(two)) return false; - return equal(begin(one),end(one),begin(two),fun); - } - -template -tree tree::subtree(sibling_iterator from, sibling_iterator to) const - { - tree tmp; - tmp.set_head(value_type()); - tmp.replace(tmp.begin(), tmp.end(), from, to); - return tmp; - } - -template -void tree::subtree(tree& tmp, sibling_iterator from, sibling_iterator to) const - { - tmp.set_head(value_type()); - tmp.replace(tmp.begin(), tmp.end(), from, to); - } - -template -int tree::size() const - { - int i=0; - pre_order_iterator it=begin(), eit=end(); - while(it!=eit) { - ++i; - ++it; - } - return i; - } - -template -int tree::size(const iterator_base& top) const - { - int i=0; - pre_order_iterator it=top, eit=top; - eit.skip_children(); - ++eit; - while(it!=eit) { - ++i; - ++it; - } - return i; - } - -template -bool tree::empty() const - { - pre_order_iterator it=begin(), eit=end(); - return (it==eit); - } - -template -int tree::depth(const iterator_base& it) const - { - tree_node* pos=it.node; - assert(pos!=0); - int ret=0; - while(pos->parent!=0) { - pos=pos->parent; - ++ret; - } - return ret; - } - -template -int tree::max_depth() const - { - return max_depth(begin()); - } - - -template -int tree::max_depth(const iterator_base& pos) const - { - tree_node *tmp=pos.node; - int curdepth=0, maxdepth=0; - while(true) { // try to walk the bottom of the tree - while(tmp->first_child==0) { - if(tmp==pos.node) return maxdepth; - if(tmp->next_sibling==0) { - // try to walk up and then right again - do { - tmp=tmp->parent; - if(tmp==0) return maxdepth; - --curdepth; - } while(tmp->next_sibling==0); - } - if(tmp==pos.node) return maxdepth; - tmp=tmp->next_sibling; - } - tmp=tmp->first_child; - ++curdepth; - maxdepth=std::max(curdepth, maxdepth); - } - } - -template -unsigned int tree::number_of_children(const iterator_base& it) - { - tree_node *pos=it.node->first_child; - if(pos==0) return 0; - - unsigned int ret=1; -// while(pos!=it.node->last_child) { -// ++ret; -// pos=pos->next_sibling; -// } - while((pos=pos->next_sibling)) - ++ret; - return ret; - } - -template -unsigned int tree::number_of_siblings(const iterator_base& it) const - { - tree_node *pos=it.node; - unsigned int ret=0; - // count forward - while(pos->next_sibling && - pos->next_sibling!=head && - pos->next_sibling!=feet) { - ++ret; - pos=pos->next_sibling; - } - // count backward - pos=it.node; - while(pos->prev_sibling && - pos->prev_sibling!=head && - pos->prev_sibling!=feet) { - ++ret; - pos=pos->prev_sibling; - } - - return ret; - } - -template -void tree::swap(sibling_iterator it) - { - tree_node *nxt=it.node->next_sibling; - if(nxt) { - if(it.node->prev_sibling) - it.node->prev_sibling->next_sibling=nxt; - else - it.node->parent->first_child=nxt; - nxt->prev_sibling=it.node->prev_sibling; - tree_node *nxtnxt=nxt->next_sibling; - if(nxtnxt) - nxtnxt->prev_sibling=it.node; - else - it.node->parent->last_child=it.node; - nxt->next_sibling=it.node; - it.node->prev_sibling=nxt; - it.node->next_sibling=nxtnxt; - } - } - -template -void tree::swap(iterator one, iterator two) - { - // if one and two are adjacent siblings, use the sibling swap - if(one.node->next_sibling==two.node) swap(one); - else if(two.node->next_sibling==one.node) swap(two); - else { - tree_node *nxt1=one.node->next_sibling; - tree_node *nxt2=two.node->next_sibling; - tree_node *pre1=one.node->prev_sibling; - tree_node *pre2=two.node->prev_sibling; - tree_node *par1=one.node->parent; - tree_node *par2=two.node->parent; - - // reconnect - one.node->parent=par2; - one.node->next_sibling=nxt2; - if(nxt2) nxt2->prev_sibling=one.node; - else par2->last_child=one.node; - one.node->prev_sibling=pre2; - if(pre2) pre2->next_sibling=one.node; - else par2->first_child=one.node; - - two.node->parent=par1; - two.node->next_sibling=nxt1; - if(nxt1) nxt1->prev_sibling=two.node; - else par1->last_child=two.node; - two.node->prev_sibling=pre1; - if(pre1) pre1->next_sibling=two.node; - else par1->first_child=two.node; - } - } - -// template -// tree::iterator tree::find_subtree( -// sibling_iterator subfrom, sibling_iterator subto, iterator from, iterator to, -// BinaryPredicate fun) const -// { -// assert(1==0); // this routine is not finished yet. -// while(from!=to) { -// if(fun(*subfrom, *from)) { -// -// } -// } -// return to; -// } - -template -bool tree::is_in_subtree(const iterator_base& it, const iterator_base& begin, - const iterator_base& end) const - { - // FIXME: this should be optimised. - pre_order_iterator tmp=begin; - while(tmp!=end) { - if(tmp==it) return true; - ++tmp; - } - return false; - } - -template -bool tree::is_valid(const iterator_base& it) const - { - if(it.node==0 || it.node==feet || it.node==head) return false; - else return true; - } - -template -unsigned int tree::index(sibling_iterator it) const - { - unsigned int ind=0; - if(it.node->parent==0) { - while(it.node->prev_sibling!=head) { - it.node=it.node->prev_sibling; - ++ind; - } - } - else { - while(it.node->prev_sibling!=0) { - it.node=it.node->prev_sibling; - ++ind; - } - } - return ind; - } - - -template -typename tree::sibling_iterator tree::child(const iterator_base& it, unsigned int num) const - { - tree_node *tmp=it.node->first_child; - while(num--) { - assert(tmp!=0); - tmp=tmp->next_sibling; - } - return tmp; - } - - - - -// Iterator base - -template -tree::iterator_base::iterator_base() - : node(0), skip_current_children_(false) - { - } - -template -tree::iterator_base::iterator_base(tree_node *tn) - : node(tn), skip_current_children_(false) - { - } - -template -T& tree::iterator_base::operator*() const - { - return node->data; - } - -template -T* tree::iterator_base::operator->() const - { - return &(node->data); - } - -template -bool tree::post_order_iterator::operator!=(const post_order_iterator& other) const - { - if(other.node!=this->node) return true; - else return false; - } - -template -bool tree::post_order_iterator::operator==(const post_order_iterator& other) const - { - if(other.node==this->node) return true; - else return false; - } - -template -bool tree::pre_order_iterator::operator!=(const pre_order_iterator& other) const - { - if(other.node!=this->node) return true; - else return false; - } - -template -bool tree::pre_order_iterator::operator==(const pre_order_iterator& other) const - { - if(other.node==this->node) return true; - else return false; - } - -template -bool tree::sibling_iterator::operator!=(const sibling_iterator& other) const - { - if(other.node!=this->node) return true; - else return false; - } - -template -bool tree::sibling_iterator::operator==(const sibling_iterator& other) const - { - if(other.node==this->node) return true; - else return false; - } - -template -bool tree::leaf_iterator::operator!=(const leaf_iterator& other) const - { - if(other.node!=this->node) return true; - else return false; - } - -template -bool tree::leaf_iterator::operator==(const leaf_iterator& other) const - { - if(other.node==this->node && other.top==this->top) return true; - else return false; - } - -template -typename tree::sibling_iterator tree::iterator_base::begin() const - { - if(node->first_child==0) - return end(); - - sibling_iterator ret(node->first_child); - ret.parent_=this->node; - return ret; - } - -template -typename tree::sibling_iterator tree::iterator_base::end() const - { - sibling_iterator ret(0); - ret.parent_=node; - return ret; - } - -template -void tree::iterator_base::skip_children() - { - skip_current_children_=true; - } - -template -unsigned int tree::iterator_base::number_of_children() const - { - tree_node *pos=node->first_child; - if(pos==0) return 0; - - unsigned int ret=1; - while(pos!=node->last_child) { - ++ret; - pos=pos->next_sibling; - } - return ret; - } - - - -// Pre-order iterator - -template -tree::pre_order_iterator::pre_order_iterator() - : iterator_base(0) - { - } - -template -tree::pre_order_iterator::pre_order_iterator(tree_node *tn) - : iterator_base(tn) - { - } - -template -tree::pre_order_iterator::pre_order_iterator(const iterator_base &other) - : iterator_base(other.node) - { - } - -template -tree::pre_order_iterator::pre_order_iterator(const sibling_iterator& other) - : iterator_base(other.node) - { - if(this->node==0) { - if(other.range_last()!=0) - this->node=other.range_last(); - else - this->node=other.parent_; - this->skip_children(); - ++(*this); - } - } - -template -typename tree::pre_order_iterator& tree::pre_order_iterator::operator++() - { - assert(this->node!=0); - if(!this->skip_current_children_ && this->node->first_child != 0) { - this->node=this->node->first_child; - } - else { - this->skip_current_children_=false; - while(this->node->next_sibling==0) { - this->node=this->node->parent; - if(this->node==0) - return *this; - } - this->node=this->node->next_sibling; - } - return *this; - } - -template -typename tree::pre_order_iterator& tree::pre_order_iterator::operator--() - { - assert(this->node!=0); - if(this->node->prev_sibling) { - this->node=this->node->prev_sibling; - while(this->node->last_child) - this->node=this->node->last_child; - } - else { - this->node=this->node->parent; - if(this->node==0) - return *this; - } - return *this; -} - -template -typename tree::pre_order_iterator tree::pre_order_iterator::operator++(int n) - { - pre_order_iterator copy = *this; - ++(*this); - return copy; - } - -template -typename tree::pre_order_iterator tree::pre_order_iterator::operator--(int n) -{ - pre_order_iterator copy = *this; - --(*this); - return copy; -} - -template -typename tree::pre_order_iterator& tree::pre_order_iterator::operator+=(unsigned int num) - { - while(num>0) { - ++(*this); - --num; - } - return (*this); - } - -template -typename tree::pre_order_iterator& tree::pre_order_iterator::operator-=(unsigned int num) - { - while(num>0) { - --(*this); - --num; - } - return (*this); - } - - - -// Post-order iterator - -template -tree::post_order_iterator::post_order_iterator() - : iterator_base(0) - { - } - -template -tree::post_order_iterator::post_order_iterator(tree_node *tn) - : iterator_base(tn) - { - } - -template -tree::post_order_iterator::post_order_iterator(const iterator_base &other) - : iterator_base(other.node) - { - } - -template -tree::post_order_iterator::post_order_iterator(const sibling_iterator& other) - : iterator_base(other.node) - { - if(this->node==0) { - if(other.range_last()!=0) - this->node=other.range_last(); - else - this->node=other.parent_; - this->skip_children(); - ++(*this); - } - } - -template -typename tree::post_order_iterator& tree::post_order_iterator::operator++() - { - assert(this->node!=0); - if(this->node->next_sibling==0) { - this->node=this->node->parent; - this->skip_current_children_=false; - } - else { - this->node=this->node->next_sibling; - if(this->skip_current_children_) { - this->skip_current_children_=false; - } - else { - while(this->node->first_child) - this->node=this->node->first_child; - } - } - return *this; - } - -template -typename tree::post_order_iterator& tree::post_order_iterator::operator--() - { - assert(this->node!=0); - if(this->skip_current_children_ || this->node->last_child==0) { - this->skip_current_children_=false; - while(this->node->prev_sibling==0) - this->node=this->node->parent; - this->node=this->node->prev_sibling; - } - else { - this->node=this->node->last_child; - } - return *this; - } - -template -typename tree::post_order_iterator tree::post_order_iterator::operator++(int) - { - post_order_iterator copy = *this; - ++(*this); - return copy; - } - -template -typename tree::post_order_iterator tree::post_order_iterator::operator--(int) - { - post_order_iterator copy = *this; - --(*this); - return copy; - } - - -template -typename tree::post_order_iterator& tree::post_order_iterator::operator+=(unsigned int num) - { - while(num>0) { - ++(*this); - --num; - } - return (*this); - } - -template -typename tree::post_order_iterator& tree::post_order_iterator::operator-=(unsigned int num) - { - while(num>0) { - --(*this); - --num; - } - return (*this); - } - -template -void tree::post_order_iterator::descend_all() - { - assert(this->node!=0); - while(this->node->first_child) - this->node=this->node->first_child; - } - - -// Breadth-first iterator - -template -tree::breadth_first_queued_iterator::breadth_first_queued_iterator() - : iterator_base() - { - } - -template -tree::breadth_first_queued_iterator::breadth_first_queued_iterator(tree_node *tn) - : iterator_base(tn) - { - traversal_queue.push(tn); - } - -template -tree::breadth_first_queued_iterator::breadth_first_queued_iterator(const iterator_base& other) - : iterator_base(other.node) - { - traversal_queue.push(other.node); - } - -template -bool tree::breadth_first_queued_iterator::operator!=(const breadth_first_queued_iterator& other) const - { - if(other.node!=this->node) return true; - else return false; - } - -template -bool tree::breadth_first_queued_iterator::operator==(const breadth_first_queued_iterator& other) const - { - if(other.node==this->node) return true; - else return false; - } - -template -typename tree::breadth_first_queued_iterator& tree::breadth_first_queued_iterator::operator++() - { - assert(this->node!=0); - - // Add child nodes and pop current node - sibling_iterator sib=this->begin(); - while(sib!=this->end()) { - traversal_queue.push(sib.node); - ++sib; - } - traversal_queue.pop(); - if(traversal_queue.size()>0) - this->node=traversal_queue.front(); - else - this->node=0; - return (*this); - } - -template -typename tree::breadth_first_queued_iterator tree::breadth_first_queued_iterator::operator++(int n) - { - breadth_first_queued_iterator copy = *this; - ++(*this); - return copy; - } - -template -typename tree::breadth_first_queued_iterator& tree::breadth_first_queued_iterator::operator+=(unsigned int num) - { - while(num>0) { - ++(*this); - --num; - } - return (*this); - } - - - -// Fixed depth iterator - -template -tree::fixed_depth_iterator::fixed_depth_iterator() - : iterator_base() - { - set_first_parent_(); - } - -template -tree::fixed_depth_iterator::fixed_depth_iterator(tree_node *tn) - : iterator_base(tn) - { - set_first_parent_(); - } - -template -tree::fixed_depth_iterator::fixed_depth_iterator(const iterator_base& other) - : iterator_base(other.node) - { - set_first_parent_(); - } - -template -tree::fixed_depth_iterator::fixed_depth_iterator(const sibling_iterator& other) - : iterator_base(other.node), first_parent_(other.parent_) - { - find_leftmost_parent_(); - } - -template -tree::fixed_depth_iterator::fixed_depth_iterator(const fixed_depth_iterator& other) - : iterator_base(other.node), first_parent_(other.first_parent_) - { - } - -template -bool tree::fixed_depth_iterator::operator==(const fixed_depth_iterator& other) const - { - if(other.node==this->node && other.first_parent_==first_parent_) return true; - else return false; - } - -template -bool tree::fixed_depth_iterator::operator!=(const fixed_depth_iterator& other) const - { - if(other.node!=this->node || other.first_parent_!=first_parent_) return true; - else return false; - } - -template -void tree::fixed_depth_iterator::set_first_parent_() - { - return; // FIXME: we do not use first_parent_ yet, and it actually needs some serious reworking if - // it is ever to work at the 'head' level. - first_parent_=0; - if(this->node==0) return; - if(this->node->parent!=0) - first_parent_=this->node->parent; - if(first_parent_) - find_leftmost_parent_(); - } - -template -void tree::fixed_depth_iterator::find_leftmost_parent_() - { - return; // FIXME: see 'set_first_parent()' - tree_node *tmppar=first_parent_; - while(tmppar->prev_sibling) { - tmppar=tmppar->prev_sibling; - if(tmppar->first_child) - first_parent_=tmppar; - } - } - -template -typename tree::fixed_depth_iterator& tree::fixed_depth_iterator::operator++() - { - assert(this->node!=0); - - if(this->node->next_sibling) { - this->node=this->node->next_sibling; - } - else { - int relative_depth=0; - upper: - do { - this->node=this->node->parent; - if(this->node==0) return *this; - --relative_depth; - } while(this->node->next_sibling==0); - lower: - this->node=this->node->next_sibling; - while(this->node->first_child==0) { - if(this->node->next_sibling==0) - goto upper; - this->node=this->node->next_sibling; - if(this->node==0) return *this; - } - while(relative_depth<0 && this->node->first_child!=0) { - this->node=this->node->first_child; - ++relative_depth; - } - if(relative_depth<0) { - if(this->node->next_sibling==0) goto upper; - else goto lower; - } - } - return *this; - -// if(this->node->next_sibling!=0) { -// this->node=this->node->next_sibling; -// assert(this->node!=0); -// if(this->node->parent==0 && this->node->next_sibling==0) // feet element -// this->node=0; -// } -// else { -// tree_node *par=this->node->parent; -// do { -// par=par->next_sibling; -// if(par==0) { // FIXME: need to keep track of this! -// this->node=0; -// return *this; -// } -// } while(par->first_child==0); -// this->node=par->first_child; -// } - return *this; - } - -template -typename tree::fixed_depth_iterator& tree::fixed_depth_iterator::operator--() - { - assert(this->node!=0); - if(this->node->prev_sibling!=0) { - this->node=this->node->prev_sibling; - assert(this->node!=0); - if(this->node->parent==0 && this->node->prev_sibling==0) // head element - this->node=0; - } - else { - tree_node *par=this->node->parent; - do { - par=par->prev_sibling; - if(par==0) { // FIXME: need to keep track of this! - this->node=0; - return *this; - } - } while(par->last_child==0); - this->node=par->last_child; - } - return *this; -} - -template -typename tree::fixed_depth_iterator tree::fixed_depth_iterator::operator++(int) - { - fixed_depth_iterator copy = *this; - ++(*this); - return copy; - } - -template -typename tree::fixed_depth_iterator tree::fixed_depth_iterator::operator--(int) -{ - fixed_depth_iterator copy = *this; - --(*this); - return copy; -} - -template -typename tree::fixed_depth_iterator& tree::fixed_depth_iterator::operator-=(unsigned int num) - { - while(num>0) { - --(*this); - --(num); - } - return (*this); - } - -template -typename tree::fixed_depth_iterator& tree::fixed_depth_iterator::operator+=(unsigned int num) - { - while(num>0) { - ++(*this); - --(num); - } - return *this; - } - -// FIXME: add the other members of fixed_depth_iterator. - - -// Sibling iterator - -template -tree::sibling_iterator::sibling_iterator() - : iterator_base() - { - set_parent_(); - } - -template -tree::sibling_iterator::sibling_iterator(tree_node *tn) - : iterator_base(tn) - { - set_parent_(); - } - -template -tree::sibling_iterator::sibling_iterator(const iterator_base& other) - : iterator_base(other.node) - { - set_parent_(); - } - -template -tree::sibling_iterator::sibling_iterator(const sibling_iterator& other) - : iterator_base(other), parent_(other.parent_) - { - } - -template -void tree::sibling_iterator::set_parent_() - { - parent_=0; - if(this->node==0) return; - if(this->node->parent!=0) - parent_=this->node->parent; - } - -template -typename tree::sibling_iterator& tree::sibling_iterator::operator++() - { - if(this->node) - this->node=this->node->next_sibling; - return *this; - } - -template -typename tree::sibling_iterator& tree::sibling_iterator::operator--() - { - if(this->node) this->node=this->node->prev_sibling; - else { - assert(parent_); - this->node=parent_->last_child; - } - return *this; -} - -template -typename tree::sibling_iterator tree::sibling_iterator::operator++(int) - { - sibling_iterator copy = *this; - ++(*this); - return copy; - } - -template -typename tree::sibling_iterator tree::sibling_iterator::operator--(int) - { - sibling_iterator copy = *this; - --(*this); - return copy; - } - -template -typename tree::sibling_iterator& tree::sibling_iterator::operator+=(unsigned int num) - { - while(num>0) { - ++(*this); - --num; - } - return (*this); - } - -template -typename tree::sibling_iterator& tree::sibling_iterator::operator-=(unsigned int num) - { - while(num>0) { - --(*this); - --num; - } - return (*this); - } - -template -typename tree::tree_node *tree::sibling_iterator::range_first() const - { - tree_node *tmp=parent_->first_child; - return tmp; - } - -template -typename tree::tree_node *tree::sibling_iterator::range_last() const - { - return parent_->last_child; - } - -// Leaf iterator - -template -tree::leaf_iterator::leaf_iterator() - : iterator_base(0), top_node(0) - { - } - -template -tree::leaf_iterator::leaf_iterator(tree_node *tn, tree_node *top) - : iterator_base(tn), top_node(top) - { - } - -template -tree::leaf_iterator::leaf_iterator(const iterator_base &other) - : iterator_base(other.node), top_node(0) - { - } - -template -tree::leaf_iterator::leaf_iterator(const sibling_iterator& other) - : iterator_base(other.node), top_node(0) - { - if(this->node==0) { - if(other.range_last()!=0) - this->node=other.range_last(); - else - this->node=other.parent_; - ++(*this); - } - } - -template -typename tree::leaf_iterator& tree::leaf_iterator::operator++() - { - assert(this->node!=0); - while(this->node->next_sibling==0) { - if (this->node->parent==0) return *this; - this->node=this->node->parent; - if (top_node != 0 && this->node==top_node) return *this; - } - this->node=this->node->next_sibling; - while(this->node->first_child) - this->node=this->node->first_child; - return *this; - } - -template -typename tree::leaf_iterator& tree::leaf_iterator::operator--() - { - assert(this->node!=0); - while (this->node->prev_sibling==0) { - if (this->node->parent==0) return *this; - this->node=this->node->parent; - if (top_node !=0 && this->node==top_node) return *this; - } - this->node=this->node->prev_sibling; - while(this->node->last_child) - this->node=this->node->last_child; - return *this; - } - -template -typename tree::leaf_iterator tree::leaf_iterator::operator++(int) - { - leaf_iterator copy = *this; - ++(*this); - return copy; - } - -template -typename tree::leaf_iterator tree::leaf_iterator::operator--(int) - { - leaf_iterator copy = *this; - --(*this); - return copy; - } - - -template -typename tree::leaf_iterator& tree::leaf_iterator::operator+=(unsigned int num) - { - while(num>0) { - ++(*this); - --num; - } - return (*this); - } - -template -typename tree::leaf_iterator& tree::leaf_iterator::operator-=(unsigned int num) - { - while(num>0) { - --(*this); - --num; - } - return (*this); - } - -#endif - -// Local variables: -// default-tab-width: 3 -// End: diff --git a/thirdparty/jsoncpp/pkg-config/jsoncpp.pc.in b/thirdparty/jsoncpp/pkg-config/jsoncpp.pc.in new file mode 100755 index 000000000..9613181b2 --- /dev/null +++ b/thirdparty/jsoncpp/pkg-config/jsoncpp.pc.in @@ -0,0 +1,11 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${exec_prefix}/@LIBRARY_INSTALL_DIR@ +includedir=${prefix}/@INCLUDE_INSTALL_DIR@ + +Name: jsoncpp +Description: A C++ library for interacting with JSON +Version: @JSONCPP_VERSION@ +URL: https://github.com/open-source-parsers/jsoncpp +Libs: -L${libdir} -ljsoncpp +Cflags: -I${includedir} From d73521a0095c03fef8cfcc04908001bfd709d970 Mon Sep 17 00:00:00 2001 From: liu946 Date: Sat, 3 Jun 2017 04:01:37 +0800 Subject: [PATCH 02/22] .gitignore srl exe output --- .gitignore | 2 +- src/console/CMakeLists.txt | 7 ++++--- src/ltp/Ltp.h | 5 +++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 95203610d..67d86c66f 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,7 @@ include/ lib/ bin/ tools/train/lgdpj -tools/train/lgsrl +tools/train/srl* tools/train/otcws tools/train/otpos tools/train/otner diff --git a/src/console/CMakeLists.txt b/src/console/CMakeLists.txt index 2d2a4286e..d7a4a1dff 100644 --- a/src/console/CMakeLists.txt +++ b/src/console/CMakeLists.txt @@ -3,7 +3,8 @@ include_directories (./ ${THIRDPARTY_DIR}/boost/include/ ${THIRDPARTY_DIR}/tinythreadpp ${THIRDPARTY_DIR}/maxent - ${THIRDPARTY_DIR}/tinyxml) + ${THIRDPARTY_DIR}/tinyxml + ${THIRDPARTY_DIR}/jsoncpp/include) set (ltp_test_SRC ltp_test.cpp ${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp) @@ -25,14 +26,14 @@ target_link_libraries (ltp_test segmentor_static_lib postagger_static_lib parser_static_lib - lstm_sdparser_static_lib ner_static_lib srl_static_lib xml4nlp boost_regex_static_lib boost_program_options_static_lib boost_serialization_static_lib - dynet) + dynet + jsoncpp_lib_static) add_executable (cws_cmdline cws_cmdline.cpp ${THIRDPARTY_DIR}/tinythreadpp/tinythread.cpp) diff --git a/src/ltp/Ltp.h b/src/ltp/Ltp.h index 48bc62b3f..d7b39dfcb 100644 --- a/src/ltp/Ltp.h +++ b/src/ltp/Ltp.h @@ -21,6 +21,11 @@ #define LTP_SERVICE_NAME_DEPPARSE "dp" #define LTP_SERVICE_NAME_SRL "srl" #define LTP_SERVICE_NAME_ALL "all" +#define LTP_SERVICE_NAME_DEFAULT LTP_SERVICE_NAME_ALL + +#define LTP_SERVICE_OUTPUT_FORMAT_XML "xml" +#define LTP_SERVICE_OUTPUT_FORMAT_JSON "json" +#define LTP_SERVICE_OUTPUT_FORMAT_DEFAULT LTP_SERVICE_OUTPUT_FORMAT_XML enum ErrorCodes { kEmptyStringError = 1, /*< The input sentence is empty */ From 5d7c9065a1091edc8087f13d20f3a5675e49762c Mon Sep 17 00:00:00 2001 From: liu946 Date: Sat, 3 Jun 2017 04:04:39 +0800 Subject: [PATCH 03/22] dynet --- thirdparty/CMakeLists.txt | 3 +- thirdparty/dynet/.travis.yml | 90 + thirdparty/dynet/CMakeLists.txt | 200 ++ thirdparty/dynet/LICENSE | 202 ++ thirdparty/dynet/MEM.notes | 10 + thirdparty/dynet/README.md | 18 + thirdparty/dynet/cmake/FindDyNet.cmake | 15 + thirdparty/dynet/cmake/FindEigen3.cmake | 90 + thirdparty/dynet/config.h.cmake | 8 + thirdparty/dynet/dynet/CMakeLists.txt | 161 + thirdparty/dynet/dynet/aligned-mem-pool.cc | 81 + thirdparty/dynet/dynet/aligned-mem-pool.h | 71 + thirdparty/dynet/dynet/c2w.h | 61 + thirdparty/dynet/dynet/cfsm-builder.cc | 234 ++ thirdparty/dynet/dynet/cfsm-builder.h | 110 + thirdparty/dynet/dynet/cuda.cc | 95 + thirdparty/dynet/dynet/cuda.h | 84 + thirdparty/dynet/dynet/cudnn-ops.cu | 224 ++ thirdparty/dynet/dynet/cudnn-ops.h | 66 + thirdparty/dynet/dynet/deep-lstm.cc | 165 + thirdparty/dynet/dynet/deep-lstm.h | 54 + thirdparty/dynet/dynet/devices.cc | 126 + thirdparty/dynet/dynet/devices.h | 80 + thirdparty/dynet/dynet/dict.cc | 48 + thirdparty/dynet/dynet/dict.h | 89 + thirdparty/dynet/dynet/dim.cc | 30 + thirdparty/dynet/dynet/dim.h | 323 ++ thirdparty/dynet/dynet/dynet-helper.h | 25 + thirdparty/dynet/dynet/dynet.cc | 353 ++ thirdparty/dynet/dynet/dynet.h | 633 ++++ thirdparty/dynet/dynet/except.h | 65 + thirdparty/dynet/dynet/exec.cc | 180 ++ thirdparty/dynet/dynet/exec.h | 48 + thirdparty/dynet/dynet/expr.cc | 191 ++ thirdparty/dynet/dynet/expr.h | 2006 ++++++++++++ thirdparty/dynet/dynet/fast-lstm.cc | 217 ++ thirdparty/dynet/dynet/fast-lstm.h | 68 + thirdparty/dynet/dynet/functors.h | 317 ++ thirdparty/dynet/dynet/globals.cc | 11 + thirdparty/dynet/dynet/globals.h | 17 + thirdparty/dynet/dynet/gpu-kernels.h | 57 + thirdparty/dynet/dynet/gpu-model.cu | 3 + thirdparty/dynet/dynet/gpu-nodes-contract.cu | 3 + thirdparty/dynet/dynet/gpu-nodes-conv.cu | 3 + thirdparty/dynet/dynet/gpu-nodes-conv2d.cu | 1 + thirdparty/dynet/dynet/gpu-nodes.cu | 3 + thirdparty/dynet/dynet/gpu-ops.cu | 105 + thirdparty/dynet/dynet/gpu-ops.h | 16 + thirdparty/dynet/dynet/gpu-param-nodes.cu | 3 + thirdparty/dynet/dynet/gpu-tensor.cu | 3 + thirdparty/dynet/dynet/gpu-training.cu | 3 + thirdparty/dynet/dynet/grad-check.cc | 100 + thirdparty/dynet/dynet/grad-check.h | 16 + thirdparty/dynet/dynet/graph.cc | 34 + thirdparty/dynet/dynet/graph.h | 9 + thirdparty/dynet/dynet/gru.cc | 158 + thirdparty/dynet/dynet/gru.h | 52 + thirdparty/dynet/dynet/hsm-builder.cc | 300 ++ thirdparty/dynet/dynet/hsm-builder.h | 88 + thirdparty/dynet/dynet/init.cc | 214 ++ thirdparty/dynet/dynet/init.h | 37 + thirdparty/dynet/dynet/io-macros.h | 165 + thirdparty/dynet/dynet/lstm.cc | 620 ++++ thirdparty/dynet/dynet/lstm.h | 319 ++ thirdparty/dynet/dynet/mem.cc | 90 + thirdparty/dynet/dynet/mem.h | 52 + thirdparty/dynet/dynet/model.cc | 740 +++++ thirdparty/dynet/dynet/model.h | 727 +++++ thirdparty/dynet/dynet/mp.cc | 88 + thirdparty/dynet/dynet/mp.h | 466 +++ thirdparty/dynet/dynet/nodes-common.cc | 1139 +++++++ thirdparty/dynet/dynet/nodes-contract.cc | 183 ++ thirdparty/dynet/dynet/nodes-contract.h | 32 + thirdparty/dynet/dynet/nodes-conv.cc | 453 +++ thirdparty/dynet/dynet/nodes-conv.h | 100 + thirdparty/dynet/dynet/nodes-conv2d.cc | 190 ++ thirdparty/dynet/dynet/nodes-macros.h | 114 + thirdparty/dynet/dynet/nodes.cc | 2826 +++++++++++++++++ thirdparty/dynet/dynet/nodes.h | 733 +++++ thirdparty/dynet/dynet/op-helper.h | 64 + thirdparty/dynet/dynet/param-nodes.cc | 275 ++ thirdparty/dynet/dynet/param-nodes.h | 92 + thirdparty/dynet/dynet/pretrain.cc | 63 + thirdparty/dynet/dynet/pretrain.h | 22 + thirdparty/dynet/dynet/rnn-state-machine.cc | 20 + thirdparty/dynet/dynet/rnn-state-machine.h | 47 + thirdparty/dynet/dynet/rnn.cc | 193 ++ thirdparty/dynet/dynet/rnn.h | 378 +++ thirdparty/dynet/dynet/saxe-init.cc | 27 + thirdparty/dynet/dynet/saxe-init.h | 12 + thirdparty/dynet/dynet/shadow-params.cc | 60 + thirdparty/dynet/dynet/shadow-params.h | 43 + thirdparty/dynet/dynet/simd-functors.h | 227 ++ thirdparty/dynet/dynet/tensor.cc | 438 +++ thirdparty/dynet/dynet/tensor.h | 742 +++++ thirdparty/dynet/dynet/timing.h | 22 + thirdparty/dynet/dynet/training.cc | 379 +++ thirdparty/dynet/dynet/training.h | 447 +++ thirdparty/dynet/dynet/treelstm.cc | 377 +++ thirdparty/dynet/dynet/treelstm.h | 122 + thirdparty/dynet/dynet/weight-decay.cc | 8 + thirdparty/dynet/dynet/weight-decay.h | 46 + .../eigen_backward_spatial_convolutions.h | 503 +++ .../third_party/eigen_spatial_convolutions.h | 1070 +++++++ 104 files changed, 22789 insertions(+), 2 deletions(-) create mode 100644 thirdparty/dynet/.travis.yml create mode 100644 thirdparty/dynet/CMakeLists.txt create mode 100644 thirdparty/dynet/LICENSE create mode 100644 thirdparty/dynet/MEM.notes create mode 100644 thirdparty/dynet/README.md create mode 100644 thirdparty/dynet/cmake/FindDyNet.cmake create mode 100644 thirdparty/dynet/cmake/FindEigen3.cmake create mode 100644 thirdparty/dynet/config.h.cmake create mode 100644 thirdparty/dynet/dynet/CMakeLists.txt create mode 100644 thirdparty/dynet/dynet/aligned-mem-pool.cc create mode 100644 thirdparty/dynet/dynet/aligned-mem-pool.h create mode 100644 thirdparty/dynet/dynet/c2w.h create mode 100644 thirdparty/dynet/dynet/cfsm-builder.cc create mode 100644 thirdparty/dynet/dynet/cfsm-builder.h create mode 100644 thirdparty/dynet/dynet/cuda.cc create mode 100644 thirdparty/dynet/dynet/cuda.h create mode 100644 thirdparty/dynet/dynet/cudnn-ops.cu create mode 100644 thirdparty/dynet/dynet/cudnn-ops.h create mode 100644 thirdparty/dynet/dynet/deep-lstm.cc create mode 100644 thirdparty/dynet/dynet/deep-lstm.h create mode 100644 thirdparty/dynet/dynet/devices.cc create mode 100644 thirdparty/dynet/dynet/devices.h create mode 100644 thirdparty/dynet/dynet/dict.cc create mode 100644 thirdparty/dynet/dynet/dict.h create mode 100644 thirdparty/dynet/dynet/dim.cc create mode 100644 thirdparty/dynet/dynet/dim.h create mode 100644 thirdparty/dynet/dynet/dynet-helper.h create mode 100644 thirdparty/dynet/dynet/dynet.cc create mode 100644 thirdparty/dynet/dynet/dynet.h create mode 100644 thirdparty/dynet/dynet/except.h create mode 100644 thirdparty/dynet/dynet/exec.cc create mode 100644 thirdparty/dynet/dynet/exec.h create mode 100644 thirdparty/dynet/dynet/expr.cc create mode 100644 thirdparty/dynet/dynet/expr.h create mode 100644 thirdparty/dynet/dynet/fast-lstm.cc create mode 100644 thirdparty/dynet/dynet/fast-lstm.h create mode 100644 thirdparty/dynet/dynet/functors.h create mode 100644 thirdparty/dynet/dynet/globals.cc create mode 100644 thirdparty/dynet/dynet/globals.h create mode 100644 thirdparty/dynet/dynet/gpu-kernels.h create mode 100644 thirdparty/dynet/dynet/gpu-model.cu create mode 100644 thirdparty/dynet/dynet/gpu-nodes-contract.cu create mode 100644 thirdparty/dynet/dynet/gpu-nodes-conv.cu create mode 100644 thirdparty/dynet/dynet/gpu-nodes-conv2d.cu create mode 100644 thirdparty/dynet/dynet/gpu-nodes.cu create mode 100644 thirdparty/dynet/dynet/gpu-ops.cu create mode 100644 thirdparty/dynet/dynet/gpu-ops.h create mode 100644 thirdparty/dynet/dynet/gpu-param-nodes.cu create mode 100644 thirdparty/dynet/dynet/gpu-tensor.cu create mode 100644 thirdparty/dynet/dynet/gpu-training.cu create mode 100644 thirdparty/dynet/dynet/grad-check.cc create mode 100644 thirdparty/dynet/dynet/grad-check.h create mode 100644 thirdparty/dynet/dynet/graph.cc create mode 100644 thirdparty/dynet/dynet/graph.h create mode 100644 thirdparty/dynet/dynet/gru.cc create mode 100644 thirdparty/dynet/dynet/gru.h create mode 100644 thirdparty/dynet/dynet/hsm-builder.cc create mode 100644 thirdparty/dynet/dynet/hsm-builder.h create mode 100644 thirdparty/dynet/dynet/init.cc create mode 100644 thirdparty/dynet/dynet/init.h create mode 100644 thirdparty/dynet/dynet/io-macros.h create mode 100644 thirdparty/dynet/dynet/lstm.cc create mode 100644 thirdparty/dynet/dynet/lstm.h create mode 100644 thirdparty/dynet/dynet/mem.cc create mode 100644 thirdparty/dynet/dynet/mem.h create mode 100644 thirdparty/dynet/dynet/model.cc create mode 100644 thirdparty/dynet/dynet/model.h create mode 100644 thirdparty/dynet/dynet/mp.cc create mode 100644 thirdparty/dynet/dynet/mp.h create mode 100644 thirdparty/dynet/dynet/nodes-common.cc create mode 100644 thirdparty/dynet/dynet/nodes-contract.cc create mode 100644 thirdparty/dynet/dynet/nodes-contract.h create mode 100644 thirdparty/dynet/dynet/nodes-conv.cc create mode 100644 thirdparty/dynet/dynet/nodes-conv.h create mode 100644 thirdparty/dynet/dynet/nodes-conv2d.cc create mode 100644 thirdparty/dynet/dynet/nodes-macros.h create mode 100644 thirdparty/dynet/dynet/nodes.cc create mode 100644 thirdparty/dynet/dynet/nodes.h create mode 100644 thirdparty/dynet/dynet/op-helper.h create mode 100644 thirdparty/dynet/dynet/param-nodes.cc create mode 100644 thirdparty/dynet/dynet/param-nodes.h create mode 100644 thirdparty/dynet/dynet/pretrain.cc create mode 100644 thirdparty/dynet/dynet/pretrain.h create mode 100644 thirdparty/dynet/dynet/rnn-state-machine.cc create mode 100644 thirdparty/dynet/dynet/rnn-state-machine.h create mode 100644 thirdparty/dynet/dynet/rnn.cc create mode 100644 thirdparty/dynet/dynet/rnn.h create mode 100644 thirdparty/dynet/dynet/saxe-init.cc create mode 100644 thirdparty/dynet/dynet/saxe-init.h create mode 100644 thirdparty/dynet/dynet/shadow-params.cc create mode 100644 thirdparty/dynet/dynet/shadow-params.h create mode 100644 thirdparty/dynet/dynet/simd-functors.h create mode 100644 thirdparty/dynet/dynet/tensor.cc create mode 100644 thirdparty/dynet/dynet/tensor.h create mode 100644 thirdparty/dynet/dynet/timing.h create mode 100644 thirdparty/dynet/dynet/training.cc create mode 100644 thirdparty/dynet/dynet/training.h create mode 100644 thirdparty/dynet/dynet/treelstm.cc create mode 100644 thirdparty/dynet/dynet/treelstm.h create mode 100644 thirdparty/dynet/dynet/weight-decay.cc create mode 100644 thirdparty/dynet/dynet/weight-decay.h create mode 100644 thirdparty/dynet/third_party/eigen_backward_spatial_convolutions.h create mode 100644 thirdparty/dynet/third_party/eigen_spatial_convolutions.h diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index 9b7189ea0..250466112 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory (maxent) add_subdirectory (tinyxml) #add_subdirectory (cnn) add_subdirectory (jsoncpp) +add_subdirectory (dynet) if (UNIX) add_subdirectory (gtest) @@ -10,5 +11,3 @@ endif() #include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cnn) #include_directories(${CMAKE_CURRENT_SOURCE_DIR}/eigen-3.2.10) - -#set(WITH_EIGEN_BACKEND 1) diff --git a/thirdparty/dynet/.travis.yml b/thirdparty/dynet/.travis.yml new file mode 100644 index 000000000..cbbf37819 --- /dev/null +++ b/thirdparty/dynet/.travis.yml @@ -0,0 +1,90 @@ +language: cpp + +matrix: + include: + - os: linux + compiler: gcc + env: CXX=g++-4.8 CC=gcc-4.8 PYVER=3.4 PYNUM=3 PYTHON=python CONDA_PACKAGES="numpy cython" + dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - boost-latest + packages: + - gcc-4.8 + - g++-4.8 + - libboost-filesystem1.55-dev + - libboost-program-options1.55-dev + - libboost-serialization1.55-dev + - libboost-test1.55-dev + - libboost-regex1.55-dev + - os: linux + compiler: gcc + env: CXX=g++-4.8 CC=gcc-4.8 PYVER=2.7 PYNUM=2 PYTHON=python CONDA_PACKAGES="numpy cython" + dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - boost-latest + packages: + - gcc-4.8 + - g++-4.8 + - libboost-filesystem1.55-dev + - libboost-program-options1.55-dev + - libboost-serialization1.55-dev + - libboost-test1.55-dev + - libboost-regex1.55-dev + - os: osx + compiler: clang + env: PYTHON=python PYVER=3.4 PYNUM=3 CONDA_PACKAGES="numpy cython" + - os: osx + compiler: clang + env: PYTHON=python PYVER=2.7 PYNUM=2 CONDA_PACKAGES="numpy cython" + + +before_install: + # We do this conditionally because it saves us some downloading if the + # version is the same. + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda$PYNUM-latest-Linux-x86_64.sh -O miniconda.sh; + elif [[ "$TRAVIS_OS_NAME" == "osx" ]]; then + wget https://repo.continuum.io/miniconda/Miniconda$PYNUM-latest-MacOSX-x86_64.sh -O miniconda.sh; + fi + - bash miniconda.sh -b -p $HOME/miniconda + - export PATH="$HOME/miniconda/bin:$PATH" + - hash -r + - conda config --set always_yes yes --set changeps1 no + - conda update -q conda + # Useful for debugging any issues with conda + - conda info -a + +install: + - hg clone https://bitbucket.org/eigen/eigen/ -r 346ecdb + - cd eigen + - mkdir build && cd build + - cmake .. + - sudo make install + - cd ../.. + + +before_script: + - cd $TRAVIS_BUILD_DIR + - mkdir build + - cd build + - conda create -q -n dynet-environment python=$PYVER $CONDA_PACKAGES + - source activate dynet-environment + - cmake .. -DEIGEN3_INCLUDE_DIR=/usr/local/include/eigen3 -DPYTHON=`which python` + +after_failure: + - cat $TRAVIS_BUILD_DIR/build/CMakeFiles/CMakeError.log + +script: + - if [[ $TRAVIS_OS_NAME == "linux" ]]; then make -j$(nproc); fi + - if [[ $TRAVIS_OS_NAME == "osx" ]]; then make -j$(sysctl -n hw.ncpu); fi + - make test + - cd python + - python setup.py install --user + - cd ../../tests/python + - python test.py diff --git a/thirdparty/dynet/CMakeLists.txt b/thirdparty/dynet/CMakeLists.txt new file mode 100644 index 000000000..28f1d5609 --- /dev/null +++ b/thirdparty/dynet/CMakeLists.txt @@ -0,0 +1,200 @@ +project(dynet) +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +# DYNET uses Eigen which exploits modern CPU architectures. To get the +# best possible performance, the following are recommended: +# 1. use very recent versions of gcc or Clang to build +# 2. use very recent versions of Eigen (ideally the dev version) +# 3. try compiler options like -march=native or other architecture +# flags (the compiler does not always make the best configuration +# decisions without help) + +# NOTE: This seems to be causing problems with linking before using +# make install. It is allegedly preferred, but probably doesn't +# suit our model of not installing the library most of the time. +##set(CMAKE_MACOSX_RPATH 0) + +function(find_mkl) + set(MKL_ARCH intel64) + find_path(MKL_INCLUDE_DIR mkl.h + PATHS ${MKL_ROOT} ${MKL_ROOT}/include) + find_library(MKL_CORE_LIB NAMES mkl_intel_lp64 mkl_intel_thread mkl_core + PATHS ${MKL_ROOT} ${MKL_ROOT}/lib/${MKL_ARCH} + ${MKL_ROOT}/lib #OSX + DOC "MKL core library path") + + find_library(MKL_COMPILER_LIB NAMES iomp5 libiomp5md + PATHS ${MKL_ROOT} ${MKL_ROOT}/../compiler/lib/${MKL_ARCH} #Windows + ${MKL_ROOT}/../compilers_and_libraries/linux/lib/${MKL_ARCH}_lin #Linux + ${MKL_ROOT}/../compilers_and_libraries/mac/lib #OSX + DOC "MKL compiler lib (for threaded MKL)") + + if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_COMPILER_LIB) + get_filename_component(MKL_CORE_LIB_DIR ${MKL_CORE_LIB} DIRECTORY) + get_filename_component(MKL_COMPILER_LIB_DIR ${MKL_COMPILER_LIB} DIRECTORY) + get_filename_component(MKL_COMPILER_LIB_FILE ${MKL_COMPILER_LIB} NAME) + message(STATUS "Found MKL\n * include: ${MKL_INCLUDE_DIR},\n * core library dir: ${MKL_CORE_LIB_DIR},\n * compiler library: ${MKL_COMPILER_LIB}") + + # Due to a conflict with /MT and /MD, MSVC needs mkl_intel_lp64 linked last, or we can change individual + # projects to use /MT (mkl_intel_lp64 linked with /MT, default MSVC projects use /MD), or we can instead + # link to the DLL versions. For now I'm opting for this solution which seems to work with projects still + # at their default /MD. Linux build requires the mkl_intel_lp64 to be linked first. So...: + if(MSVC) + set(LIBS ${LIBS} mkl_intel_thread mkl_core mkl_intel_lp64 ${MKL_COMPILER_LIB_FILE} PARENT_SCOPE) + else() + set(LIBS ${LIBS} mkl_intel_lp64 mkl_intel_thread mkl_core ${MKL_COMPILER_LIB_FILE} PARENT_SCOPE) + endif() + include_directories(${MKL_INCLUDE_DIR}) + link_directories(${MKL_CORE_LIB_DIR} ${MKL_COMPILER_LIB_DIR}) + set(MKL_LINK_DIRS ${MKL_CORE_LIB_DIR} ${MKL_COMPILER_LIB_DIR} PARENT_SCOPE) # Keeping this for python build + else() + message(FATAL_ERROR "Failed to find MKL in path: ${MKL_ROOT} (Did you set MKL_ROOT properly?)") + endif() +endfunction() + +######## Cross-compiler, cross-platform options +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_FAST_MATH") +if (MKL OR MKL_ROOT) + find_mkl() # sets include/lib directories and sets ${LIBS} needed for linking + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_USE_MKL_ALL") +endif() + + +######## Platform-specific options +if(WIN32) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX") # Disable min/max macros in windef.h +endif() + +######## Compiler-specific options +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W1 /MP") # -Wall produces 20k warnings. Enable parallel compilation +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -funroll-loops -fno-finite-math-only -Wall -Wno-missing-braces -std=c++11 -Ofast -g -march=native") +endif() + +#enable_testing() + +include_directories(${CMAKE_CURRENT_SOURCE_DIR} + ${PROJECT_SOURCE_DIR}/external/easyloggingpp/src) + +function(find_cudnn) + set(CUDNN_ROOT /usr/local/cuda CACHE PATH "CUDNN root path") + find_path(CUDNN_INCLUDE_DIRS cudnn.h + HINTS ${CUDNN_ROOT} + ${CUDNN_ROOT}/include + DOC "CUDNN include path") + find_library(CUDNN_LIBRARIES NAMES libcudnn.so cudnn.lib + PATHS ${CUDNN_ROOT} + ${CUDNN_ROOT}/lib + ${CUDNN_ROOT}/lib64 + ${CUDNN_ROOT}/lib/x64 + DOC "CUDNN library path") + if(CUDNN_INCLUDE_DIRS AND CUDNN_LIBRARIES) + set(CUDNN_FOUND TRUE PARENT_SCOPE) + message(STATUS "Found CUDNN (include: ${CUDNN_INCLUDE_DIRS}, library: ${CUDNN_LIBRARIES})") + mark_as_advanced(CUDNN_INCLUDE_DIRS CUDNN_LIBRARIES) + else() + MESSAGE(STATUS "Failed to find CUDNN in path: ${CUDNN_ROOT} (Did you set CUDNN_ROOT properly?)") + endif() +endfunction() + +# look for Boost +##set(BOOST_ROOT ${THIRDPARTY_DIR}/boost) +##if(DEFINED BOOST_ROOT OR DEFINED BOOSTROOT OR DEFINED ENV{BOOST_ROOT} OR DEFINED ENV{BOOSTROOT}) +## set(Boost_NO_SYSTEM_PATHS ON) +## if(DEFINED ${Boost_INCLUDE_DIR}) +## get_filename_component(Boost_INCLUDE_DIR "${Boost_INCLUDE_DIR}" REALPATH BASE_DIR "${CMAKE_BINARY_DIR}") +## endif() +##endif() +##set(Boost_REALPATH ON) +##find_package(Boost COMPONENTS program_options regex serialization REQUIRED) + +set(Boost_INCLUDE_DIR ${THIRDPARTY_DIR}/boost/include) +set(Boost_LIBRARY_DIRS ${LIBRARY_OUTPUT_PATH}) +set(Boost_LIBRARIES + boost_regex_static_lib + boost_program_options_static_lib + boost_serialization_static_lib + ) +message("-- Boost dir is " ${Boost_INCLUDE_DIR}) +include_directories(${Boost_INCLUDE_DIR}) +if(MSVC) + # Boost does auto-linking when using a compiler like Microsoft Visual C++, we just need to help it find the libraries + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LIBPATH:${Boost_LIBRARY_DIRS}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LIBPATH:${Boost_LIBRARY_DIRS}") +else() + set(LIBS ${LIBS} ${Boost_LIBRARIES}) +endif() +# trouble shooting: +# if boost library cannot be found, in addition to install boost library +# check if environment variables are set +# +# to set boost root and its library root in environment variable, use +# for example +# echo "export BOOST_LIBRARYDIR=/usr/local/lib" >> ~/.bashrc +# echo "export BOOST_ROOT=/cygdrive/d/tools/boost_1_58_0/boost_1_58_0" >> ~/.bashrc +# then run source ~/.bashrc to have those environment variable effective immediately + +if(BACKEND) + message("-- BACKEND: ${BACKEND}") +else() + message("-- BACKEND not specified, defaulting to eigen.") + set(BACKEND "eigen") +endif() + +if(BACKEND MATCHES "^eigen$") + set(WITH_EIGEN_BACKEND 1) +elseif(BACKEND MATCHES "^cuda$") + set(WITH_CUDA_BACKEND 1) +else() + message(SEND_ERROR "BACKEND must be eigen or cuda") +endif() + +if (WITH_CUDA_BACKEND) + find_package(CUDA REQUIRED) + set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_ROOT}) + include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) + #list(APPEND CUDA_LIBRARIES /usr/lib64/libpthread.so) + MESSAGE("CUDA_LIBRARIES: ${CUDA_LIBRARIES}") + list(REMOVE_ITEM CUDA_LIBRARIES -lpthread) + set(LIBS ${LIBS} ${CUDA_LIBRARIES}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_HAS_CUDA_FP16 -DEIGEN_USE_GPU") + find_cudnn() + if(CUDNN_FOUND) + include_directories(SYSTEM ${CUDNN_INCLUDE_DIRS}) + list(APPEND CUDA_LIBRARIES ${CUDNN_LIBRARIES}) + message("-- Successfully include CUDNN flags") + else() + message("-- CUDNN not found, some dependent functionalities will be disabled") + endif() +endif() + +# look for Eigen +#get_filename_component(EIGEN3_INCLUDE_DIR "${EIGEN3_INCLUDE_DIR}" REALPATH BASE_DIR "${CMAKE_BINARY_DIR}") +#message("-- Eigen dir is " ${EIGEN3_INCLUDE_DIR}) +#find_package(Eigen3 REQUIRED) +set(EIGEN3_INCLUDE_DIR ${THIRDPARTY_DIR}/eigen) +message("-- Eigen dir is " ${EIGEN3_INCLUDE_DIR}) +include_directories(${EIGEN3_INCLUDE_DIR}) + +FIND_PACKAGE(Threads REQUIRED) +set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +add_subdirectory(dynet) +#add_subdirectory(tests) +#add_subdirectory(examples) +#add_subdirectory(tutorial) +#add_subdirectory(python) + +option(INCLUDE_SWIG "INCLUDE_SWIG" OFF) +if(INCLUDE_SWIG) + message("-- Including SWIG") + add_subdirectory(contrib/swig) +endif(INCLUDE_SWIG) + +##enable_testing() diff --git a/thirdparty/dynet/LICENSE b/thirdparty/dynet/LICENSE new file mode 100644 index 000000000..28ab34023 --- /dev/null +++ b/thirdparty/dynet/LICENSE @@ -0,0 +1,202 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2015 Chris Dyer + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/thirdparty/dynet/MEM.notes b/thirdparty/dynet/MEM.notes new file mode 100644 index 000000000..80d197a35 --- /dev/null +++ b/thirdparty/dynet/MEM.notes @@ -0,0 +1,10 @@ +The code that computes the l2 norm of the gradient is going to need +scratch space on every device that DYNET is using that has a parameter. +1) devices should know whether they have parameters/gradients + +alignment code is hidden away. it's all hard coded, but it looks like +Intel at least is getting more foregiving about alingment problems so +we might not notice opportunities for speedups if something changes. +GPU memory is aligned mostly by CUDA + +the MP stuff needs to be tested by Austin. diff --git a/thirdparty/dynet/README.md b/thirdparty/dynet/README.md new file mode 100644 index 000000000..2f7802e09 --- /dev/null +++ b/thirdparty/dynet/README.md @@ -0,0 +1,18 @@ +# DyNet +The Dynamic Neural Network Toolkit + +DyNet (formerly known as [cnn](http://github.com/clab/cnn-v1)) is a neural network library developed by Carnegie Mellon University and many others. It is written in C++ (with bindings in Python) and is designed to be efficient when run on either CPU or GPU, and to work well with networks that have dynamic structures that change for every training instance. For example, these kinds of networks are particularly important in natural language processing tasks, and DyNet has been used to build state-of-the-art systems for [syntactic parsing](https://github.com/clab/lstm-parser), [machine translation](https://github.com/neubig/lamtram), [morphological inflection](https://github.com/mfaruqui/morph-trans), and many other application areas. + +Read the [documentation](http://dynet.readthedocs.io/en/latest/) to get started, and feel free to contact the [dynet-users group](https://groups.google.com/forum/#!forum/dynet-users) group with any questions (if you want to receive email make sure to select "all email" when you sign up). We greatly appreciate any bug reports and contributions, which can be made by filing an issue or making a pull request through the [github page](http://github.com/clab/dynet). + +You can also read more technical details in our [technical report](https://arxiv.org/abs/1701.03980). If you use DyNet for research, please cite this report as follows: + + @article{dynet, + title={DyNet: The Dynamic Neural Network Toolkit}, + author={Graham Neubig and Chris Dyer and Yoav Goldberg and Austin Matthews and Waleed Ammar and Antonios Anastasopoulos and Miguel Ballesteros and David Chiang and Daniel Clothiaux and Trevor Cohn and Kevin Duh and Manaal Faruqui and Cynthia Gan and Dan Garrette and Yangfeng Ji and Lingpeng Kong and Adhiguna Kuncoro and Gaurav Kumar and Chaitanya Malaviya and Paul Michel and Yusuke Oda and Matthew Richardson and Naomi Saphra and Swabha Swayamdipta and Pengcheng Yin}, + journal={arXiv preprint arXiv:1701.03980}, + year={2017} + } + +[![Build Status](https://travis-ci.org/clab/dynet.svg?branch=master)](https://travis-ci.org/clab/dynet) +[![Doc build Status](https://readthedocs.org/projects/dynet/badge/?version=latest)](http://dynet.readthedocs.io/en/latest/) diff --git a/thirdparty/dynet/cmake/FindDyNet.cmake b/thirdparty/dynet/cmake/FindDyNet.cmake new file mode 100644 index 000000000..8a48bc10d --- /dev/null +++ b/thirdparty/dynet/cmake/FindDyNet.cmake @@ -0,0 +1,15 @@ + +CMAKE_MINIMUM_REQUIRED(VERSION 2.8.7 FATAL_ERROR) + +INCLUDE(FindPackageHandleStandardArgs) + +FIND_LIBRARY(TH_LIBRARY TH) +FIND_PATH(TH_INCLUDE_DIR "TH.h" PATHS "${CMAKE_PREFIX_PATH}/include/TH") + +SET(TH_LIBRARIES ${TH_LIBRARY}) + +FIND_PACKAGE_HANDLE_STANDARD_ARGS( + TH + REQUIRED_ARGS + TH_INCLUDE_DIR + TH_LIBRARY) diff --git a/thirdparty/dynet/cmake/FindEigen3.cmake b/thirdparty/dynet/cmake/FindEigen3.cmake new file mode 100644 index 000000000..cea1afeab --- /dev/null +++ b/thirdparty/dynet/cmake/FindEigen3.cmake @@ -0,0 +1,90 @@ +# - Try to find Eigen3 lib +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(Eigen3 3.1.2) +# to require version 3.1.2 or newer of Eigen3. +# +# Once done this will define +# +# EIGEN3_FOUND - system has eigen lib with correct version +# EIGEN3_INCLUDE_DIR - the eigen include directory +# EIGEN3_VERSION - eigen version +# +# This module reads hints about search locations from +# the following enviroment variables: +# +# EIGEN3_ROOT +# EIGEN3_ROOT_DIR + +# Copyright (c) 2006, 2007 Montel Laurent, +# Copyright (c) 2008, 2009 Gael Guennebaud, +# Copyright (c) 2009 Benoit Jacob +# Redistribution and use is allowed according to the terms of the 2-clause BSD license. + +if(NOT Eigen3_FIND_VERSION) + if(NOT Eigen3_FIND_VERSION_MAJOR) + set(Eigen3_FIND_VERSION_MAJOR 2) + endif(NOT Eigen3_FIND_VERSION_MAJOR) + if(NOT Eigen3_FIND_VERSION_MINOR) + set(Eigen3_FIND_VERSION_MINOR 91) + endif(NOT Eigen3_FIND_VERSION_MINOR) + if(NOT Eigen3_FIND_VERSION_PATCH) + set(Eigen3_FIND_VERSION_PATCH 0) + endif(NOT Eigen3_FIND_VERSION_PATCH) + + set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") +endif(NOT Eigen3_FIND_VERSION) + +macro(_eigen3_check_version) + file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) + + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") + set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") + set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") + set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") + + set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) + if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK FALSE) + else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK TRUE) + endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + + if(NOT EIGEN3_VERSION_OK) + + message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " + "but at least version ${Eigen3_FIND_VERSION} is required") + endif(NOT EIGEN3_VERSION_OK) +endmacro(_eigen3_check_version) + +if (EIGEN3_INCLUDE_DIR) + + # in cache already + _eigen3_check_version() + set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) + +else (EIGEN3_INCLUDE_DIR) + + find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library + HINTS + ENV EIGEN3_ROOT + ENV EIGEN3_ROOT_DIR + PATHS + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen3 eigen + ) + + if(EIGEN3_INCLUDE_DIR) + _eigen3_check_version() + endif(EIGEN3_INCLUDE_DIR) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) + + mark_as_advanced(EIGEN3_INCLUDE_DIR) + +endif(EIGEN3_INCLUDE_DIR) + diff --git a/thirdparty/dynet/config.h.cmake b/thirdparty/dynet/config.h.cmake new file mode 100644 index 000000000..3b73020b2 --- /dev/null +++ b/thirdparty/dynet/config.h.cmake @@ -0,0 +1,8 @@ +#ifndef DYNET_CONFIG_H_ +#define DYNET_CONFIG_H_ + +#cmakedefine WITH_MINERVA_BACKEND @WITH_MINERVA_BACKEND@ +#cmakedefine WITH_THPP_BACKEND @WITH_THPP_BACKEND@ +#cmakedefine WITH_EIGEN_BACKEND @WITH_EIGEN_BACKEND@ + +#endif diff --git a/thirdparty/dynet/dynet/CMakeLists.txt b/thirdparty/dynet/dynet/CMakeLists.txt new file mode 100644 index 000000000..232f3a88f --- /dev/null +++ b/thirdparty/dynet/dynet/CMakeLists.txt @@ -0,0 +1,161 @@ +# ########## dynet library ########## +# Sources: +set(dynet_library_SRCS + aligned-mem-pool.cc + cfsm-builder.cc + dynet.cc + deep-lstm.cc + devices.cc + dict.cc + dim.cc + exec.cc + expr.cc + fast-lstm.cc + globals.cc + grad-check.cc + graph.cc + gru.cc + hsm-builder.cc + init.cc + lstm.cc + mem.cc + model.cc + mp.cc + nodes.cc + nodes-common.cc + nodes-contract.cc + nodes-conv.cc + nodes-conv2d.cc + param-nodes.cc + pretrain.cc + rnn.cc + rnn-state-machine.cc + saxe-init.cc + shadow-params.cc + tensor.cc + training.cc + treelstm.cc + weight-decay.cc +) + +# Headers: +set(dynet_library_HDRS + aligned-mem-pool.h + cfsm-builder.h + cudnn-ops.h + c2w.h + dynet.h + cuda.h + devices.h + dict.h + dim.h + exec.h + expr.h + fast-lstm.h + functors.h + globals.h + gpu-kernels.h + gpu-ops.h + graph.h + gru.h + hsm-builder.h + init.h + lstm.h + mem.h + model.h + mp.h + nodes.h + nodes-contract.h + nodes-conv.h + op-helper.h + param-nodes.h + rnn-state-machine.h + rnn.h + saxe-init.h + shadow-params.h + simd-functors.h + tensor.h + timing.h + training.h + treelstm.h + except.h + nodes-macros.h + weight-decay.h + io-macros.h +) + +file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc) +if (NOT MSVC) + set(BUILD_SHARED_LIBS ON) +endif() + +#foreach(test_src ${TEST_SRCS}) + #Extract the filename without an extension (NAME_WE) +# get_filename_component(testName ${test_src} NAME_WE) + + #Add compile target +# add_executable(${testName} ${test_src}) + + #link to Boost libraries AND your targets and dependencies +# target_link_libraries(${testName} dynet ${LIBS}) + +# set_target_properties(${testName} PROPERTIES +# RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin) + + #Finally add it to test execution - + #Notice the WORKING_DIRECTORY and COMMAND +# add_test(NAME ${testName} +# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin +# COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests.bin/${testName} ) +#endforeach(test_src) + +# Build cpu library +add_library(dynet ${dynet_library_SRCS} ${dynet_library_HDRS}) +target_link_libraries(dynet ${LIBS}) +if(WITH_CUDA_BACKEND) + # cuda flags + set(CUDA_SEPARABLE_COMPILATION ON) + list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-DVERBOSE;-DEIGEN_USE_GPU;-DHAVE_CUDA;") + if(CUDNN_FOUND) + list(APPEND CUDA_NVCC_FLAGS "-DHAVE_CUDNN") + endif() + if(CMAKE_COMPILER_IS_GNUCXX) + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + # gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use -O1 instead for now. + list(APPEND CUDA_NVCC_FLAGS "-O1") + else() + list(APPEND CUDA_NVCC_FLAGS "-O2") + endif() + else() + list(APPEND CUDA_NVCC_FLAGS "-O2") + endif() + if(MSVC) + # If MSVC, we need the boost flag because nvcc doesn't properly parse part of the boost template definitions + list(APPEND CUDA_NVCC_FLAGS "-DBOOST_NO_CXX11_ALLOCATOR") + list(APPEND CUDA_NVCC_FLAGS_DEBUG "--compiler-options \"/MDd\"") + list(APPEND CUDA_NVCC_FLAGS_RELEASE "--compiler-options \"/MD\"") + SET(CUDA_PROPAGATE_HOST_FLAGS OFF) + cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu) + else() + SET(CUDA_PROPAGATE_HOST_FLAGS OFF) + cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu OPTIONS --compiler-options "-fPIC") + endif() + set_target_properties(gdynet PROPERTIES + COMPILE_DEFINITIONS HAVE_CUDA) + if(CUDNN_FOUND) + target_compile_definitions(gdynet PRIVATE HAVE_CUDNN) + endif() + cuda_add_cublas_to_target(gdynet) + target_link_libraries(gdynet ${LIBS}) + +endif(WITH_CUDA_BACKEND) + +install(FILES ${dynet_library_HDRS} DESTINATION include/dynet) +install(TARGETS dynet DESTINATION lib) +if(WITH_CUDA_BACKEND) + install(TARGETS gdynet DESTINATION lib) +endif(WITH_CUDA_BACKEND) + +# target_compile_features(dynet PRIVATE cxx_range_for) + diff --git a/thirdparty/dynet/dynet/aligned-mem-pool.cc b/thirdparty/dynet/dynet/aligned-mem-pool.cc new file mode 100644 index 000000000..e29219379 --- /dev/null +++ b/thirdparty/dynet/dynet/aligned-mem-pool.cc @@ -0,0 +1,81 @@ +#include "aligned-mem-pool.h" + +#include + +using namespace dynet; + +void* InternalMemoryPool::allocate(size_t n) { + auto rounded_n = a->round_up_align(n); + if (rounded_n + used > capacity) { + return 0; + } + void* res = static_cast(mem) + used; + used += rounded_n; + return res; +} + +void InternalMemoryPool::sys_alloc(size_t cap) { + capacity = a->round_up_align(cap); + mem = a->malloc(capacity); + if (mem == NULL) + DYNET_RUNTIME_ERR(name << " failed to allocate " << capacity); + used = 0; +} + +AlignedMemoryPool::AlignedMemoryPool(const std::string &name, size_t cap, MemAllocator *a) : name(name), current(0), cap(cap), a(a) { + DYNET_ASSERT(cap > 0, "Attempt to allocate memory of size 0 in AlignedMemoryPool"); + pools.push_back(new InternalMemoryPool(name, cap, a)); +} +AlignedMemoryPool::~AlignedMemoryPool() { + for ( auto p : pools) { delete p; } +} + +void* AlignedMemoryPool::allocate(size_t n) { + void *res = pools[current]->allocate(n); + if (res == 0) { + // round up to the nearest multiple of cap + pools.push_back(new InternalMemoryPool(name, ((n+cap-1)/cap)*cap, a)); + current++; + res = pools[current]->allocate(n); + } + return res; +} + +void AlignedMemoryPool::free() { + if (current > 0) { + for (auto p : pools) { delete p; } + pools.clear(); + pools.push_back(new InternalMemoryPool(name, cap * (current+1), a)); + cap = cap * (current + 1); + current = 0; + } + pools[0]->free(); +} + +void AlignedMemoryPool::zero_allocated_memory() { + for (auto p : pools) { p->zero_allocated_memory(); } +} + +size_t AlignedMemoryPool::used() { + if (current == 0) { + return pools[0]->used; + } + size_t res = 0; + for (auto p : pools) { res += p->used; } + return res; +} + +void AlignedMemoryPool::set_used(size_t s) { + DYNET_ARG_CHECK(pools.size() == 1, "Dynet does not support both dynamic increasing of memory pool size, and checkpointing functionality in AlignedMemoryPool. If you want to use checkpointing, please pre-allocate enough memory using the --dynet-mem command line option."); + pools[0]->used = s; + // TODO: This is disabled for now, because it would require freeing all the memory pools to do properly + // int c = 0; + // while (s > pools[c]->used) { + // s -= pools[c]->used; + // c++; + // DYNET_ASSERT(c <= current, "attempt to set_used to a larger value than used()."); + // } + // // s <= pools[c]->used + // pools[c]->used = s; + // current = c; +} diff --git a/thirdparty/dynet/dynet/aligned-mem-pool.h b/thirdparty/dynet/dynet/aligned-mem-pool.h new file mode 100644 index 000000000..a4fc2288a --- /dev/null +++ b/thirdparty/dynet/dynet/aligned-mem-pool.h @@ -0,0 +1,71 @@ +#ifndef DYNET_ALIGNED_MEM_POOL_H +#define DYNET_ALIGNED_MEM_POOL_H + +#include +#include "dynet/mem.h" +#include "dynet/globals.h" +#include "dynet/except.h" + +namespace dynet { + +class InternalMemoryPool { + public: + explicit InternalMemoryPool(const std::string & name, size_t cap, MemAllocator* a) : name(name), a(a) { + sys_alloc(cap); + zero_all(); + } + + ~InternalMemoryPool() { + a->free(mem); + } + + void* allocate(size_t n); + + void free() { + //std::cerr << "freeing " << used << " bytes\n"; + used = 0; + } + // zeros out the amount of allocations + void zero_allocated_memory() { + if (used == 0) return; + a->zero(mem, used); + } + + size_t used; + private: + void sys_alloc(size_t cap); + + void zero_all() { + a->zero(mem, capacity); + } + std::string name; + size_t capacity; + MemAllocator* a; + void* mem; +}; + +class AlignedMemoryPool { + public: + explicit AlignedMemoryPool(const std::string &name, size_t cap, MemAllocator *a); + ~AlignedMemoryPool(); + + void* allocate(size_t n); + + void free(); + + void zero_allocated_memory(); + + size_t used(); + void set_used(size_t s); + + private: + std::string name; + std::vector pools; + int current; + size_t cap; + MemAllocator* a; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/c2w.h b/thirdparty/dynet/dynet/c2w.h new file mode 100644 index 000000000..8eab5afb7 --- /dev/null +++ b/thirdparty/dynet/dynet/c2w.h @@ -0,0 +1,61 @@ +#ifndef DYNET_C2W_H_ +#define DYNET_C2W_H_ + +#include +#include + +#include "dynet/dynet.h" +#include "dynet/model.h" +#include "dynet/lstm.h" + +namespace dynet { + +// computes a representation of a word by reading characters +// one at a time +struct C2WBuilder { + LSTMBuilder fc2w; + LSTMBuilder rc2w; + LookupParameter p_lookup; + std::vector words; + std::map wordid2vi; + explicit C2WBuilder(int vocab_size, + unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model* m) : + fc2w(layers, input_dim, hidden_dim, m), + rc2w(layers, input_dim, hidden_dim, m), + p_lookup(m->add_lookup_parameters(vocab_size, {input_dim})) { + } + void new_graph(ComputationGraph* cg) { + words.clear(); + fc2w.new_graph(cg); + rc2w.new_graph(cg); + } + // compute a composed representation of a word out of characters + // wordid should be a unique index for each word *type* in the graph being built + VariableIndex add_word(int word_id, const std::vector& chars, ComputationGraph* cg) { + auto it = wordid2vi.find(word_id); + if (it == wordid2vi.end()) { + fc2w.start_new_sequence(cg); + rc2w.start_new_sequence(cg); + std::vector ins(chars.size()); + std::map c2i; + for (unsigned i = 0; i < ins.size(); ++i) { + VariableIndex& v = c2i[chars[i]]; + if (!v) v = cg->add_lookup(p_lookup, chars[i]); + ins[i] = v; + fc2w.add_input(v, cg); + } + for (int i = ins.size() - 1; i >= 0; --i) + rc2w.add_input(ins[i], cg); + VariableIndex i_concat = cg->add_function({fc2w.back(), rc2w.back()}); + it = wordid2vi.insert(std::make_pair(word_id, i_concat)).first; + } + return it->second; + } +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/cfsm-builder.cc b/thirdparty/dynet/dynet/cfsm-builder.cc new file mode 100644 index 000000000..678f013f3 --- /dev/null +++ b/thirdparty/dynet/dynet/cfsm-builder.cc @@ -0,0 +1,234 @@ +#include "dynet/cfsm-builder.h" +#include "dynet/except.h" + +#include +#include + +#include + +using namespace std; + +namespace dynet { + +using namespace expr; + +inline bool is_ws(char x) { return (x == ' ' || x == '\t'); } +inline bool not_ws(char x) { return (x != ' ' && x != '\t'); } + +SoftmaxBuilder::~SoftmaxBuilder() {} + +StandardSoftmaxBuilder::StandardSoftmaxBuilder() {} + +StandardSoftmaxBuilder::StandardSoftmaxBuilder(unsigned rep_dim, unsigned vocab_size, Model& model) { + p_w = model.add_parameters({vocab_size, rep_dim}); + p_b = model.add_parameters({vocab_size}, ParameterInitConst(0.f)); +} + +void StandardSoftmaxBuilder::new_graph(ComputationGraph& cg) { + pcg = &cg; + w = parameter(cg, p_w); + b = parameter(cg, p_b); +} + +Expression StandardSoftmaxBuilder::neg_log_softmax(const Expression& rep, unsigned wordidx) { + return pickneglogsoftmax(affine_transform({b, w, rep}), wordidx); +} + +unsigned StandardSoftmaxBuilder::sample(const Expression& rep) { + Expression dist_expr = softmax(affine_transform({b, w, rep})); + vector dist = as_vector(pcg->incremental_forward(dist_expr)); + unsigned c = 0; + double p = rand01(); + for (; c < dist.size(); ++c) { + p -= dist[c]; + if (p < 0.0) { break; } + } + if (c == dist.size()) { + --c; + } + return c; +} + +Expression StandardSoftmaxBuilder::full_log_distribution(const Expression& rep) { + return log(softmax(affine_transform({b, w, rep}))); +} + +DYNET_SERIALIZE_COMMIT(StandardSoftmaxBuilder, DYNET_SERIALIZE_DERIVED_DEFINE(SoftmaxBuilder, p_w, p_b)) +DYNET_SERIALIZE_IMPL(StandardSoftmaxBuilder) + +ClassFactoredSoftmaxBuilder::ClassFactoredSoftmaxBuilder() {} + +ClassFactoredSoftmaxBuilder::ClassFactoredSoftmaxBuilder(unsigned rep_dim, + const std::string& cluster_file, + Dict& word_dict, + Model& model) { + read_cluster_file(cluster_file, word_dict); + const unsigned num_clusters = cdict.size(); + p_r2c = model.add_parameters({num_clusters, rep_dim}); + p_cbias = model.add_parameters({num_clusters}, ParameterInitConst(0.f)); + p_rc2ws.resize(num_clusters); + p_rcwbiases.resize(num_clusters); + for (unsigned i = 0; i < num_clusters; ++i) { + auto& words = cidx2words[i]; // vector of word ids + const unsigned num_words_in_cluster = words.size(); + if (num_words_in_cluster > 1) { + // for singleton clusters, we don't need these parameters, so + // we don't create them + p_rc2ws[i] = model.add_parameters({num_words_in_cluster, rep_dim}); + p_rcwbiases[i] = model.add_parameters({num_words_in_cluster}, ParameterInitConst(0.f)); + } + } +} + +void ClassFactoredSoftmaxBuilder::new_graph(ComputationGraph& cg) { + pcg = &cg; + const unsigned num_clusters = cdict.size(); + r2c = parameter(cg, p_r2c); + cbias = parameter(cg, p_cbias); + rc2ws.clear(); + rc2biases.clear(); + rc2ws.resize(num_clusters); + rc2biases.resize(num_clusters); +} + +Expression ClassFactoredSoftmaxBuilder::neg_log_softmax(const Expression& rep, unsigned wordidx) { + // TODO check that new_graph has been called + int clusteridx = widx2cidx[wordidx]; + DYNET_ARG_CHECK(clusteridx >= 0, + "Word ID " << wordidx << " missing from clusters in ClassFactoredSoftmaxBuilder::neg_log_softmax"); + Expression cscores = affine_transform({cbias, r2c, rep}); + Expression cnlp = pickneglogsoftmax(cscores, clusteridx); + if (singleton_cluster[clusteridx]) return cnlp; + // if there is only one word in the cluster, just return -log p(class | rep) + // otherwise predict word too + unsigned wordrow = widx2cwidx[wordidx]; + Expression& cwbias = get_rc2wbias(clusteridx); + Expression& r2cw = get_rc2w(clusteridx); + Expression wscores = affine_transform({cwbias, r2cw, rep}); + Expression wnlp = pickneglogsoftmax(wscores, wordrow); + return cnlp + wnlp; +} + +unsigned ClassFactoredSoftmaxBuilder::sample(const Expression& rep) { + // TODO check that new_graph has been called + Expression cscores = affine_transform({cbias, r2c, rep}); + Expression cdist_expr = softmax(cscores); + auto cdist = as_vector(pcg->incremental_forward(cdist_expr)); + unsigned c = 0; + double p = rand01(); + for (; c < cdist.size(); ++c) { + p -= cdist[c]; + if (p < 0.0) { break; } + } + if (c == cdist.size()) --c; + unsigned w = 0; + if (!singleton_cluster[c]) { + Expression& cwbias = get_rc2wbias(c); + Expression& r2cw = get_rc2w(c); + Expression wscores = affine_transform({cwbias, r2cw, rep}); + Expression wdist_expr = softmax(wscores); + auto wdist = as_vector(pcg->incremental_forward(wdist_expr)); + p = rand01(); + for (; w < wdist.size(); ++w) { + p -= wdist[w]; + if (p < 0.0) { break; } + } + if (w == wdist.size()) --w; + } + return cidx2words[c][w]; +} + +Expression ClassFactoredSoftmaxBuilder::full_log_distribution(const Expression& rep) { + vector full_dist(widx2cidx.size()); + Expression cscores = log(softmax(affine_transform({cbias, r2c, rep}))); + + for (unsigned i = 0; i < widx2cidx.size(); ++i) { + if (widx2cidx[i] == -1) { + // XXX: Should be -inf + full_dist[i] = input(*pcg, -10000); + } + } + + for (unsigned c = 0; c < p_rc2ws.size(); ++c) { + Expression cscore = pick(cscores, c); + if (singleton_cluster[c]) { + for (unsigned i = 0; i < cidx2words[c].size(); ++i) { + unsigned w = cidx2words[c][i]; + full_dist[w] = cscore; + } + } + else { + Expression& cwbias = get_rc2wbias(c); + Expression& r2cw = get_rc2w(c); + Expression wscores = affine_transform({cwbias, r2cw, rep}); + Expression wdist = softmax(wscores); + + for (unsigned i = 0; i < cidx2words[c].size(); ++i) { + unsigned w = cidx2words[c][i]; + full_dist[w] = pick(wdist, i) + cscore; + } + } + } + + return log(softmax(concatenate(full_dist))); +} + +void ClassFactoredSoftmaxBuilder::read_cluster_file(const std::string& cluster_file, Dict& word_dict) { + cerr << "Reading clusters from " << cluster_file << " ...\n"; + ifstream in(cluster_file); + if(!in) + DYNET_INVALID_ARG("Could not find cluster file " << cluster_file << " in ClassFactoredSoftmax"); + int wc = 0; + string line; + while(getline(in, line)) { + ++wc; + const unsigned len = line.size(); + unsigned startc = 0; + while (is_ws(line[startc]) && startc < len) { ++startc; } + unsigned endc = startc; + while (not_ws(line[endc]) && endc < len) { ++endc; } + unsigned startw = endc; + while (is_ws(line[startw]) && startw < len) { ++startw; } + unsigned endw = startw; + while (not_ws(line[endw]) && endw < len) { ++endw; } + if(endc <= startc || startw <= endc || endw <= startw) + DYNET_INVALID_ARG("Invalid format in cluster file " << cluster_file << " in ClassFactoredSoftmax"); + unsigned c = cdict.convert(line.substr(startc, endc - startc)); + unsigned word = word_dict.convert(line.substr(startw, endw - startw)); + if (word >= widx2cidx.size()) { + widx2cidx.resize(word + 1, -1); + widx2cwidx.resize(word + 1); + } + widx2cidx[word] = c; + if (c >= cidx2words.size()) cidx2words.resize(c + 1); + auto& clusterwords = cidx2words[c]; + widx2cwidx[word] = clusterwords.size(); + clusterwords.push_back(word); + } + singleton_cluster.resize(cidx2words.size()); + int scs = 0; + for (unsigned i = 0; i < cidx2words.size(); ++i) { + bool sc = cidx2words[i].size() <= 1; + if (sc) scs++; + singleton_cluster[i] = sc; + } + cerr << "Read " << wc << " words in " << cdict.size() << " clusters (" << scs << " singleton clusters)\n"; +} + +DYNET_SERIALIZE_COMMIT(ClassFactoredSoftmaxBuilder, + DYNET_SERIALIZE_DERIVED_DEFINE(SoftmaxBuilder, cdict, widx2cidx, widx2cwidx, cidx2words, singleton_cluster, p_r2c, p_cbias, p_rc2ws, p_rcwbiases)) + +void ClassFactoredSoftmaxBuilder::initialize_expressions() { + for (unsigned c = 0; c < p_rc2ws.size(); ++c) { + //get_rc2w(_bias) creates the expression at c if the expression does not already exist. + get_rc2w(c); + get_rc2wbias(c); + } +} + +DYNET_SERIALIZE_IMPL(ClassFactoredSoftmaxBuilder) + +} // namespace dynet + +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::StandardSoftmaxBuilder) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::ClassFactoredSoftmaxBuilder) diff --git a/thirdparty/dynet/dynet/cfsm-builder.h b/thirdparty/dynet/dynet/cfsm-builder.h new file mode 100644 index 000000000..1f7fc2ec0 --- /dev/null +++ b/thirdparty/dynet/dynet/cfsm-builder.h @@ -0,0 +1,110 @@ +#ifndef DYNET_CFSMBUILDER_H +#define DYNET_CFSMBUILDER_H + +#include +#include + +#include "dynet/dynet.h" +#include "dynet/expr.h" +#include "dynet/dict.h" +#include "dynet/io-macros.h" + +namespace dynet { + +class SoftmaxBuilder { +public: + virtual ~SoftmaxBuilder(); + + // call this once per ComputationGraph + virtual void new_graph(ComputationGraph& cg) = 0; + + // -log(p(w | rep)) + virtual expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx) = 0; + + // samples a word from p(w | rep) + virtual unsigned sample(const expr::Expression& rep) = 0; + + // returns an Expression representing a vector the size of the vocabulary. + // The ith dimension gives log p(w_i | rep). This function may be SLOW. Avoid if possible. + virtual expr::Expression full_log_distribution(const expr::Expression& rep) = 0; + + DYNET_SERIALIZE_COMMIT_EMPTY() +}; + +class StandardSoftmaxBuilder : public SoftmaxBuilder { +public: + StandardSoftmaxBuilder(unsigned rep_dim, unsigned vocab_size, Model& model); + void new_graph(ComputationGraph& cg); + expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); + unsigned sample(const expr::Expression& rep); + expr::Expression full_log_distribution(const expr::Expression& rep); + +private: + StandardSoftmaxBuilder(); + Parameter p_w; + Parameter p_b; + expr::Expression w; + expr::Expression b; + ComputationGraph* pcg; + + DYNET_SERIALIZE_DECLARE() +}; + +// helps with implementation of hierarchical softmax +// read a file with lines of the following format +// CLASSID word [freq] +class ClassFactoredSoftmaxBuilder : public SoftmaxBuilder { + public: + ClassFactoredSoftmaxBuilder(unsigned rep_dim, + const std::string& cluster_file, + Dict& word_dict, + Model& model); + + void new_graph(ComputationGraph& cg); + expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); + unsigned sample(const expr::Expression& rep); + expr::Expression full_log_distribution(const expr::Expression& rep); + void initialize_expressions(); + + private: + ClassFactoredSoftmaxBuilder(); + void read_cluster_file(const std::string& cluster_file, Dict& word_dict); + + Dict cdict; + std::vector widx2cidx; // will be -1 if not present + std::vector widx2cwidx; // word index to word index inside of cluster + std::vector> cidx2words; + std::vector singleton_cluster; // does cluster contain a single word type? + + // parameters + Parameter p_r2c; + Parameter p_cbias; + std::vector p_rc2ws; // len = number of classes + std::vector p_rcwbiases; // len = number of classes + + // Expressions for current graph + inline expr::Expression& get_rc2w(unsigned cluster_idx) { + expr::Expression& e = rc2ws[cluster_idx]; + if (!e.pg) + e = expr::parameter(*pcg, p_rc2ws[cluster_idx]); + return e; + } + inline expr::Expression& get_rc2wbias(unsigned cluster_idx) { + expr::Expression& e = rc2biases[cluster_idx]; + if (!e.pg) + e = expr::parameter(*pcg, p_rcwbiases[cluster_idx]); + return e; + } + ComputationGraph* pcg; + expr::Expression r2c; + expr::Expression cbias; + std::vector rc2ws; + std::vector rc2biases; + DYNET_SERIALIZE_DECLARE() +}; +} // namespace dynet + +BOOST_CLASS_EXPORT_KEY(dynet::StandardSoftmaxBuilder) +BOOST_CLASS_EXPORT_KEY(dynet::ClassFactoredSoftmaxBuilder) + +#endif diff --git a/thirdparty/dynet/dynet/cuda.cc b/thirdparty/dynet/dynet/cuda.cc new file mode 100644 index 000000000..fc504c59f --- /dev/null +++ b/thirdparty/dynet/dynet/cuda.cc @@ -0,0 +1,95 @@ +#include +#include +#include + +#include "dynet/dynet.h" +#include "dynet/cuda.h" +#include "dynet/init.h" + +using namespace std; + +namespace dynet { + +vector initialize_gpu(DynetParams& params) { + // Get GPU devices count + int nDevices; + CUDA_CHECK(cudaGetDeviceCount(&nDevices)); + if (nDevices < 1) + throw std::runtime_error("No GPUs found but DyNet compiled with CUDA support. Recompile without -DBACKEND=cuda"); + + // Check gpu_mask + for (unsigned gpu_id = nDevices; gpu_id < MAX_GPUS; ++gpu_id) { + if (params.gpu_mask[gpu_id] != 0) { + ostringstream oss; oss << "You requested GPU id " << gpu_id << " but system only reports up to " << nDevices; + throw std::invalid_argument(oss.str()); + } + } + + if (params.ngpus_requested || params.requested_gpus == -1) { + if (params.requested_gpus == -1) params.requested_gpus = 1; + cerr << "Request for " << params.requested_gpus << " GPU" << (params.requested_gpus == 1 ? "" : "s") << " ...\n"; + for (int i = 0; i < MAX_GPUS; ++i) params.gpu_mask[i] = 1; + } else if (params.ids_requested) { + params.requested_gpus++; + cerr << "[dynet] Request for " << params.requested_gpus << " specific GPU" << (params.requested_gpus == 1 ? "" : "s") << " ...\n"; + } + + vector gpudevices; + if (params.requested_gpus == 0) return gpudevices; + if (params.requested_gpus > nDevices) { + ostringstream oss; oss << "You requested " << params.requested_gpus << " GPUs but system only reports " << nDevices; + throw std::invalid_argument(oss.str()); + } + + // after all that, params.requested_gpus is the number of GPUs to reserve + // we now pick the ones that are both requested by the user or have + // the most memory free + + vector gpu_free_mem(MAX_GPUS, 0); + vector gpus(MAX_GPUS, 0); + for (int i = 0; i < MAX_GPUS; ++i) gpus[i] = i; + size_t free_bytes, total_bytes; + for (int i = 0; i < nDevices; i++) { + if (!params.gpu_mask[i]) continue; + cudaDeviceProp prop; + CUDA_CHECK(cudaGetDeviceProperties(&prop, i)); + cerr << "[dynet] Device Number: " << i << endl; + cerr << "[dynet] Device name: " << prop.name << endl; + cerr << "[dynet] Memory Clock Rate (KHz): " << prop.memoryClockRate << endl; + cerr << "[dynet] Memory Bus Width (bits): " << prop.memoryBusWidth << endl; + cerr << "[dynet] Peak Memory Bandwidth (GB/s): " << (2.0 * prop.memoryClockRate * (prop.memoryBusWidth / 8) / 1.0e6) << endl; + if (!prop.unifiedAddressing) + throw std::invalid_argument("[dynet] GPU does not support unified addressing."); + CUDA_CHECK(cudaSetDevice(i)); + try { + CUDA_CHECK(cudaMemGetInfo( &free_bytes, &total_bytes )); + cerr << "[dynet] Memory Free (GB): " << free_bytes / 1.0e9 << "/" << total_bytes / 1.0e9 << endl; + cerr << "[dynet]" << endl; + gpu_free_mem[i] = free_bytes; + } catch (dynet::cuda_exception e) { + cerr << "[dynet] FAILED to get free memory" << endl; + gpu_free_mem[i] = 0; + cudaGetLastError(); + } + CUDA_CHECK(cudaDeviceReset()); + } + stable_sort(gpus.begin(), gpus.end(), [&](int a, int b) -> bool { return gpu_free_mem[a] > gpu_free_mem[b]; }); + gpus.resize(params.requested_gpus); + cerr << "[dynet] Device(s) selected:"; + for (int i = 0; i < params.requested_gpus; ++i) { + cerr << ' ' << gpus[i]; + Device* d = new Device_GPU(gpudevices.size(), params.mem_descriptor, gpus[i]); + gpudevices.push_back(d); + } + cerr << endl; + + return gpudevices; + +} + +vector initialize_gpu(int& argc, char**& argv) { + DynetParams params = extract_dynet_params(argc, argv); + return initialize_gpu(params); +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/cuda.h b/thirdparty/dynet/dynet/cuda.h new file mode 100644 index 000000000..13b0d8309 --- /dev/null +++ b/thirdparty/dynet/dynet/cuda.h @@ -0,0 +1,84 @@ +#ifndef DYNET_CUDA_H +#define DYNET_CUDA_H +#if HAVE_CUDA + +#include +#include +#include +#include +#include +#include +#if HAVE_CUDNN +#include +#endif +#include "dynet/except.h" + +#define MAX_GPUS 256 + +#define CUDA_CHECK(stmt) do { \ + cudaError_t err = stmt; \ + if (err != cudaSuccess) { \ + std::cerr << "CUDA failure in " << #stmt << std::endl\ + << cudaGetErrorString(err) << std::endl; \ + throw dynet::cuda_exception(#stmt); \ + } \ + } while(0) + +#define CUBLAS_CHECK(stmt) do { \ + cublasStatus_t stat = stmt; \ + if (stat != CUBLAS_STATUS_SUCCESS) { \ + std::cerr << "CUBLAS failure in " << #stmt \ + << std::endl << stat << std::endl; \ + throw dynet::cuda_exception(#stmt); \ + } \ + } while(0) + +#if HAVE_CUDNN +#define CUDNN_CHECK(stmt) do { \ + cudnnStatus_t stat = (stmt); \ + if (stat != CUDNN_STATUS_SUCCESS){ \ + std::cerr << "CUDNN failure in " << #stmt \ + << std::endl << cudnnGetErrorString(stat) \ + << std::endl; \ + throw dynet::cuda_exception(#stmt); \ + } \ + } while(0) +#endif + +namespace dynet { + +struct DynetParams; + + +class Device; + +inline std::pair SizeToBlockThreadPair(int n) { + DYNET_ASSERT(n > 0, "Bad thread size in GPU code " << n); + int logn; +#if defined(_MSC_VER) + logn = 0; + if (n > 2) { + int localN = n - 1; + while (localN >>= 1) + logn++; + } +#else + asm("\tbsr %1, %0\n" + : "=r"(logn) + : "r" (n-1)); +#endif + logn = logn > 9 ? 9 : (logn < 4 ? 4 : logn); + ++logn; + int threads = 1 << logn; + int blocks = (n + threads - 1) >> logn; + blocks = blocks > 65535 ? 65535 : blocks; + return std::make_pair(blocks, threads); +} + +std::vector initialize_gpu(dynet::DynetParams& params); +std::vector initialize_gpu(int& argc, char**& argv); + +} // namespace dynet + +#endif +#endif diff --git a/thirdparty/dynet/dynet/cudnn-ops.cu b/thirdparty/dynet/dynet/cudnn-ops.cu new file mode 100644 index 000000000..cbaea8c64 --- /dev/null +++ b/thirdparty/dynet/dynet/cudnn-ops.cu @@ -0,0 +1,224 @@ +#if HAVE_CUDNN +#include +#include +#include + +#include "dynet/dynet.h" +#include "dynet/cudnn-ops.h" + +namespace dynet { + +CudnnConvOp::CudnnConvOp(const std::vector& s, const bool padding_type) { + stride.resize(s.size()); + for (unsigned i = 0; i < stride.size(); ++i) { + stride[i] = static_cast(s[i]); + } + is_valid = padding_type; + + fwd_workspace = NULL; + bwd_filter_workspace = NULL; + bwd_data_workspace = NULL; + workspace_fwd_size_ = 0; + workspace_bwd_data_size_ = 0; + workspace_bwd_filter_size_ = 0; + mempool_ = NULL; + + CUDNN_CHECK(cudnnCreateTensorDescriptor(&x_desc_)); + CUDNN_CHECK(cudnnCreateTensorDescriptor(&y_desc_)); + CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_)); + CUDNN_CHECK(cudnnCreateFilterDescriptor(&filter_desc_)); + CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc_)); +} + +CudnnConvOp::~CudnnConvOp() { + CUDNN_CHECK(cudnnDestroyTensorDescriptor(x_desc_)); + CUDNN_CHECK(cudnnDestroyTensorDescriptor(y_desc_)); + CUDNN_CHECK(cudnnDestroyTensorDescriptor(bias_desc_)); + CUDNN_CHECK(cudnnDestroyFilterDescriptor(filter_desc_)); + CUDNN_CHECK(cudnnDestroyConvolutionDescriptor(conv_desc_)); +} + +void CudnnConvOp::forward_impl(const Device_GPU & dev, const std::vector& xs, Tensor& fx) { + const Tensor* x = xs[0]; + const Tensor* filter = xs[1]; + Tensor* y = &fx; + + unsigned XN = x->d.bd; + unsigned XC = x->d[2]; + unsigned XH = x->d[0]; + unsigned XW = x->d[1]; + unsigned FYC = filter->d[3]; + unsigned FXC = filter->d[2]; + unsigned FH = filter->d[0]; + unsigned FW = filter->d[1]; + unsigned YN = fx.d.bd; + unsigned YC = fx.d[2]; + unsigned YH = fx.d[0]; + unsigned YW = fx.d[1]; + + // infer pad_h, pad_w + if (!is_valid) { + // Total padding on rows and cols is + // Pr = (R' - 1) * S + Kr - R + // Pc = (C' - 1) * S + Kc - C + // where (R', C') are output dimensions, (R, C) are input dimensions, S + // is stride, (Kr, Kc) are filter dimensions. + // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top + // and Pc - Pc/2 on the bottom. When Pr or Pc is odd, this means + // we pad more on the right and bottom than on the top and left. + pad_h = std::max(0, (YH - 1) * stride[0] + FH - XH); + pad_w = std::max(0, (YW - 1) * stride[1] + FW - XW); + if (mempool_ == NULL) { + throw std::runtime_error("dynet::CudnnConvOp::mempool_ not set"); + } + const bool h_odd = (pad_h % 2 != 0); + const bool w_odd = (pad_w % 2 != 0); + if (h_odd || w_odd) { // then we need to pad one row/col on the bottom/right + unsigned new_XH = XH + h_odd; + unsigned new_XW = XW + w_odd; + void* temp = mempool_->allocate(sizeof(float) * new_XW * new_XH * XC * XN); + padded_x = Tensor(Dim({ new_XH, new_XW, XC }, XN), static_cast(temp), xs[0]->device, DeviceMempool::FXS); + Eigen::array, 4> paddings; + paddings[0] = std::make_pair(0, static_cast(h_odd)); + paddings[1] = std::make_pair(0, static_cast(w_odd)); + paddings[2] = std::make_pair(0, 0); + paddings[3] = std::make_pair(0, 0); + padded_x.tb<3>().device(*dev.edevice) = xs[0]->tb<3>().pad(paddings); + XH = new_XH; + XW = new_XW; + x = &padded_x; + } + } + + if (xs.size() == 3) { + CUDNN_CHECK(cudnnSetTensor4dDescriptor(bias_desc_, + CUDNN_TENSOR_NCHW, DataTypeToCudnnType::value, + 1, FYC, 1, 1)); + } + CUDNN_CHECK(cudnnSetTensor4dDescriptor(x_desc_, + CUDNN_TENSOR_NCHW, DataTypeToCudnnType::value, + XN, XC, XW, XH)); + CUDNN_CHECK(cudnnSetTensor4dDescriptor(y_desc_, + CUDNN_TENSOR_NCHW, DataTypeToCudnnType::value, + YN, YC, YW, YH)); + CUDNN_CHECK(cudnnSetFilter4dDescriptor(filter_desc_, + DataTypeToCudnnType::value, CUDNN_TENSOR_NCHW, + FYC, FXC, FW, FH)); + CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc_, + pad_w/2, pad_h/2, stride[1], stride[0], 1, 1, + CUDNN_CROSS_CORRELATION)); + + //TODO(Hao Zhang): there should be an autotune function to determine + // the best convolution algorithm to use. + // However, as DyNet changes CG for every sample (or every iteration), + // This autotune function seems to be unnecessary. + CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(dev.cudnnHandle, + x_desc_, filter_desc_, conv_desc_, y_desc_, + CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, workspace_size_limit_bytes, + &fwd_algo_)); + CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(dev.cudnnHandle, + x_desc_, filter_desc_, conv_desc_, y_desc_, + fwd_algo_, &workspace_fwd_size_)); + if (fwd_workspace == NULL) { + fwd_workspace = mempool_->allocate(workspace_fwd_size_); + } + float alpha = 1.f, beta = 0.f; + CUDNN_CHECK(cudnnConvolutionForward(dev.cudnnHandle, + &alpha, x_desc_, x->v, filter_desc_, filter->v, + conv_desc_, fwd_algo_, fwd_workspace, workspace_fwd_size_, + &beta, y_desc_, y->v)); + if (xs.size() == 3) { + CUDNN_CHECK(cudnnAddTensor(dev.cudnnHandle, &alpha, + bias_desc_, xs[2]->v, &alpha, y_desc_, y->v)); + } +} + +void CudnnConvOp::backward_impl(const Device_GPU & dev, + const std::vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) { + const Tensor* x = xs[0]; + const Tensor* filter = xs[1]; + const Tensor* dy = &dEdf; + Tensor* dxi = &dEdxi; + unsigned XN = x->d.bd; + unsigned XC = x->d[2]; + unsigned XH = x->d[0]; + unsigned XW = x->d[1]; + const bool h_odd = (pad_h % 2 != 0); + const bool w_odd = (pad_w % 2 != 0); + void* dx_ptr = NULL; + if (mempool_ == NULL) + throw std::runtime_error("dynet::CudnnConvOp::mempool_ not set"); + if (h_odd || w_odd) { + unsigned new_XH = XH + h_odd; + unsigned new_XW = XW + w_odd; + DYNET_ASSERT(padded_x.d[0] == new_XH, "Tensor input_padded must have been padded"); + DYNET_ASSERT(padded_x.d[1] == new_XW, "Tensor input_padded must have been padded"); + x = &padded_x; + XH = new_XH; + XW = new_XW; + if (i == 0) + dx_ptr = mempool_->allocate(sizeof(float) * new_XW * new_XH * XC * XN); + } + // here we could reuse the descriptor we created for forward, because + // they share the same size + float alpha = 1.f, beta = 0.f; + if (i == 1) { + CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(dev.cudnnHandle, + x_desc_, y_desc_, conv_desc_, filter_desc_, + CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST, + workspace_size_limit_bytes, &bwd_f_algo_)); + CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(dev.cudnnHandle, + x_desc_, y_desc_, conv_desc_, filter_desc_, + bwd_f_algo_, &workspace_bwd_filter_size_)); + // allocate space for backward compute + if (bwd_filter_workspace == NULL) { + bwd_filter_workspace = mempool_->allocate(sizeof(float) * workspace_bwd_filter_size_); + } + CUDNN_CHECK(cudnnConvolutionBackwardFilter(dev.cudnnHandle, + &alpha, x_desc_, x->v, + y_desc_, dy->v, + conv_desc_, bwd_f_algo_, bwd_filter_workspace, workspace_bwd_filter_size_, + &beta, filter_desc_, dxi->v)); + } else if (i == 0) { + CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(dev.cudnnHandle, + filter_desc_, y_desc_, conv_desc_, x_desc_, + CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST, + workspace_size_limit_bytes, &bwd_d_algo_)); + CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(dev.cudnnHandle, + filter_desc_, y_desc_, conv_desc_, x_desc_, + bwd_d_algo_, &workspace_bwd_data_size_)); + if (bwd_data_workspace == NULL) { + bwd_data_workspace = mempool_->allocate(sizeof(float) * workspace_bwd_data_size_); + } + if (h_odd || w_odd) { + CUDNN_CHECK(cudnnConvolutionBackwardData(dev.cudnnHandle, + &alpha, filter_desc_, filter->v, + y_desc_, dy->v, + conv_desc_, bwd_d_algo_, bwd_data_workspace, workspace_bwd_data_size_, + &beta, x_desc_, dx_ptr)); + Tensor padded_dx = Tensor(Dim({XH, XW, XC}, XN), static_cast(dx_ptr), xs[0]->device, DeviceMempool::FXS); + + Eigen::array offsets = {0, 0, 0, 0}; + Eigen::array extents = {static_cast(XH), static_cast(XW), static_cast(XC), static_cast(XN)}; + dxi->tb<3>().device(*dev.edevice) = padded_dx.tb<3>().slice(offsets, extents); + } else { + CUDNN_CHECK(cudnnConvolutionBackwardData(dev.cudnnHandle, + &alpha, filter_desc_, filter->v, + y_desc_, dy->v, + conv_desc_, bwd_d_algo_, bwd_data_workspace, workspace_bwd_data_size_, + &beta, x_desc_, dxi->v)); + } + } else { + CUDNN_CHECK(cudnnConvolutionBackwardBias(dev.cudnnHandle, + &alpha, y_desc_, dy->v, + &beta, bias_desc_, dxi->v)); + } +} + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/cudnn-ops.h b/thirdparty/dynet/dynet/cudnn-ops.h new file mode 100644 index 000000000..5ea64431d --- /dev/null +++ b/thirdparty/dynet/dynet/cudnn-ops.h @@ -0,0 +1,66 @@ +#ifndef DYNET_CUDNN_OPS_H +#define DYNET_CUDNN_OPS_H + +#if HAVE_CUDNN +#include "dynet/dynet.h" +#include "dynet/cuda.h" +#include "dynet/op-helper.h" + +namespace dynet { + +class CudnnConvOp { + public: + explicit CudnnConvOp() {} + explicit CudnnConvOp(const std::vector& s, const bool padding_type); + ~CudnnConvOp(); + /* call this function before using the CudnnConvOp */ + void set_pool(NodeMemPool* mempool) { + mempool_ = mempool; + } + void forward_impl(const Device_GPU & dev, const std::vector& xs, Tensor& fx); + void backward_impl(const Device_GPU & dev, + const std::vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi); + static const size_t workspace_size_limit_bytes = 8 * 1024 * 1024; + + protected: + std::vector stride; + bool is_valid; + + /* cuDNN resource */ + cudnnTensorDescriptor_t x_desc_, y_desc_; + cudnnTensorDescriptor_t bias_desc_; + cudnnFilterDescriptor_t filter_desc_; + cudnnConvolutionDescriptor_t conv_desc_; + cudnnConvolutionFwdAlgo_t fwd_algo_; + cudnnConvolutionBwdFilterAlgo_t bwd_f_algo_; + cudnnConvolutionBwdDataAlgo_t bwd_d_algo_; + + // cudnn workspace + size_t workspace_fwd_size_; + size_t workspace_bwd_data_size_; + size_t workspace_bwd_filter_size_; + void* fwd_workspace; + void* bwd_filter_workspace; + void* bwd_data_workspace; + + private: + int pad_h = 0; + int pad_w = 0; + Tensor padded_x; + Tensor padded_dx; + NodeMemPool* mempool_; +}; + +/* +class CudnnMaxPoolingOp { + +}; +*/ +} // namespace dynet + +#endif +#endif diff --git a/thirdparty/dynet/dynet/deep-lstm.cc b/thirdparty/dynet/dynet/deep-lstm.cc new file mode 100644 index 000000000..b7b3f9340 --- /dev/null +++ b/thirdparty/dynet/dynet/deep-lstm.cc @@ -0,0 +1,165 @@ +#include "dynet/deep-lstm.h" + +#include +#include +#include + +#include "dynet/nodes.h" + +using namespace std; +using namespace dynet::expr; + +namespace dynet { + +enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; + +DeepLSTMBuilder::DeepLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) : layers(layers) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_x2i = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bi = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // o + Parameter p_x2o = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2o = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_c2o = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bo = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // c + Parameter p_x2c = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2c = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bc = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim + input_dim; // output (hidden) from 1st layer is input to next + + vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; + params.push_back(ps); + } // layers +} + +void DeepLSTMBuilder::new_graph_impl(ComputationGraph& cg){ + param_vars.clear(); + + for (unsigned i = 0; i < layers; ++i){ + auto& p = params[i]; + + //i + Expression i_x2i = parameter(cg,p[X2I]); + Expression i_h2i = parameter(cg,p[H2I]); + Expression i_c2i = parameter(cg,p[C2I]); + Expression i_bi = parameter(cg,p[BI]); + //o + Expression i_x2o = parameter(cg,p[X2O]); + Expression i_h2o = parameter(cg,p[H2O]); + Expression i_c2o = parameter(cg,p[C2O]); + Expression i_bo = parameter(cg,p[BO]); + //c + Expression i_x2c = parameter(cg,p[X2C]); + Expression i_h2c = parameter(cg,p[H2C]); + Expression i_bc = parameter(cg,p[BC]); + + vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; + param_vars.push_back(vars); + } +} + +// layout: 0..layers = c +// layers+1..2*layers = h +void DeepLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "DeepLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state and cell for each layer). However, for " << layers << " layers, " + << hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } +} + +Expression DeepLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + o.push_back(Expression()); + vector& ht = h.back(); + vector& ct = c.back(); + Expression& ot = o.back(); + Expression in = x; + vector cc(layers); + for (unsigned i = 0; i < layers; ++i) { + if (i > 0) + in = concatenate({in, x}); + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + bool has_prev_state = (prev >= 0 || has_initial_state); + if (prev < 0) { + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + } else { // t > 0 + i_h_tm1 = h[prev][i]; + i_c_tm1 = c[prev][i]; + } + // input + Expression i_ait; + if (has_prev_state) +// i_ait = vars[BI] + vars[X2I] * in + vars[H2I]*i_h_tm1 + vars[C2I] * i_c_tm1; + i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_c_tm1}); + else +// i_ait = vars[BI] + vars[X2I] * in; + i_ait = affine_transform({vars[BI], vars[X2I], in}); + Expression i_it = logistic(i_ait); + // forget + Expression i_ft = 1.f - i_it; + // write memory cell + Expression i_awt; + if (has_prev_state) +// i_awt = vars[BC] + vars[X2C] * in + vars[H2C]*i_h_tm1; + i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); + else +// i_awt = vars[BC] + vars[X2C] * in; + i_awt = affine_transform({vars[BC], vars[X2C], in}); + Expression i_wt = tanh(i_awt); + // output + if (has_prev_state) { + Expression i_nwt = cmult(i_it,i_wt); + Expression i_crt = cmult(i_ft,i_c_tm1); + ct[i] = i_crt + i_nwt; + } else { + ct[i] = cmult(i_it,i_wt); + } + + Expression i_aot; + if (has_prev_state) +// i_aot = vars[BO] + vars[X2O] * in + vars[H2O] * i_h_tm1 + vars[C2O] * ct[i]; + i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], ct[i]}); + else +// i_aot = vars[BO] + vars[X2O] * in; + i_aot = affine_transform({vars[BO], vars[X2O], in}); + Expression i_ot = logistic(i_aot); + Expression ph_t = tanh(ct[i]); + in = ht[i] = cmult(i_ot,ph_t); + cc[i] = in; + } + ot = concatenate(cc); + return ot; +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/deep-lstm.h b/thirdparty/dynet/dynet/deep-lstm.h new file mode 100644 index 000000000..3a0e6f3bb --- /dev/null +++ b/thirdparty/dynet/dynet/deep-lstm.h @@ -0,0 +1,54 @@ +#ifndef DYNET_DEEP_LSTM_H_ +#define DYNET_DEEP_LSTM_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace dynet::expr; + +namespace dynet { + +class Model; + +struct DeepLSTMBuilder : public RNNBuilder { + DeepLSTMBuilder() = default; + explicit DeepLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + + Expression back() const override { return h.back().back(); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for(auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + + public: + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is time, second is layer + std::vector> h, c; + std::vector o; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/devices.cc b/thirdparty/dynet/dynet/devices.cc new file mode 100644 index 000000000..22672f893 --- /dev/null +++ b/thirdparty/dynet/dynet/devices.cc @@ -0,0 +1,126 @@ +#include "dynet/devices.h" + +#include +#include +#include + +#include "dynet/cuda.h" +#include "dynet/dynet.h" +#include "dynet/expr.h" +#include "dynet/except.h" + +using namespace std; + +namespace dynet { + +DeviceMempoolSizes::DeviceMempoolSizes(size_t total_size) { + used[0] = total_size / 3; + used[1] = total_size / 3; + used[2] = total_size / 3; +} + +DeviceMempoolSizes::DeviceMempoolSizes(size_t fx_s, size_t dEdfs_s, size_t ps_s) { + used[0] = fx_s; + used[1] = dEdfs_s; + used[2] = ps_s; +} + +DeviceMempoolSizes::DeviceMempoolSizes(const std::string & descriptor) { + vector strs; + boost::algorithm::split(strs, descriptor, boost::is_any_of(",")); + if (strs.size() == 1) { + size_t total_size = stoi(strs[0]); + used[0] = total_size / 3; + used[1] = total_size / 3; + used[2] = total_size / 3; + } else if (strs.size() == 3) { + used[0] = stoi(strs[0]); + used[1] = stoi(strs[1]); + used[2] = stoi(strs[2]); + } else { + DYNET_INVALID_ARG("the format of --dynet-mem is invalid: " << descriptor); + } +} + +Device::~Device() {} + +DeviceMempoolSizes Device::mark(ComputationGraph *cg) { + cg->incremental_forward({cg, (VariableIndex)(cg->nodes.size() - 1)}); // needed so that we actually allocate the needed memory + // for all existing nodes. + return DeviceMempoolSizes(pools[0]->used(), pools[1]->used(), pools[2]->used()); +} + +void Device::revert(const DeviceMempoolSizes & cp) { + if(cp.used[0] > pools[0]->used()) + DYNET_INVALID_ARG("Saved value greater than original value in Device::revert (" << cp.used[0] << " > " << pools[0]->used() << ")"); + pools[0]->set_used(cp.used[0]); + if(cp.used[1] > pools[1]->used()) + DYNET_INVALID_ARG("Saved value greater than original value in Device::revert (" << cp.used[1] << " > " << pools[1]->used() << ")"); + pools[1]->set_used(cp.used[1]); + if(cp.used[2] > pools[2]->used()) + DYNET_INVALID_ARG("Saved value greater than original value in Device::revert (" << cp.used[2] << " > " << pools[2]->used() << ")"); + pools[2]->set_used(cp.used[2]); +} + +void Device::allocate_tensor(DeviceMempool mp, Tensor & tens) { + DYNET_ASSERT(mp != DeviceMempool::NONE, "Attempt to allocate tensor for NONE DeviceMempool"); + DYNET_ASSERT(pools[(int)mp] != nullptr, "Attempt to allocate tensor for null DeviceMempool"); + tens.v = (float*)pools[(int)mp]->allocate(tens.d.size() * sizeof(float)); + DYNET_ASSERT(tens.v != nullptr, "Allocated tensor is zero"); + tens.mem_pool = mp; +} + +#if HAVE_CUDA +Device_GPU::Device_GPU(int my_id, const DeviceMempoolSizes & mbs, int device_id) : + Device(my_id, DeviceType::GPU, &gpu_mem), cuda_device_id(device_id), gpu_mem(device_id) { + CUDA_CHECK(cudaSetDevice(device_id)); + CUBLAS_CHECK(cublasCreate(&cublas_handle)); + CUBLAS_CHECK(cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE)); +#if HAVE_CUDNN + CUDNN_CHECK(cudnnCreate(&cudnnHandle)); +#endif + kSCALAR_MINUSONE = (float*)gpu_mem.malloc(sizeof(float)); + kSCALAR_ONE = (float*)gpu_mem.malloc(sizeof(float)); + kSCALAR_ZERO = (float*)gpu_mem.malloc(sizeof(float)); + float minusone = -1; + CUDA_CHECK(cudaMemcpyAsync(kSCALAR_MINUSONE, &minusone, sizeof(float), cudaMemcpyHostToDevice)); + float one = 1; + CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ONE, &one, sizeof(float), cudaMemcpyHostToDevice)); + float zero = 0; + CUDA_CHECK(cudaMemcpyAsync(kSCALAR_ZERO, &zero, sizeof(float), cudaMemcpyHostToDevice)); + + // Initialize the Eigen device + estream = new Eigen::CudaStreamDevice(device_id); + edevice = new Eigen::GpuDevice(estream); + + // this is the big memory allocation. + pools[0] = new AlignedMemoryPool("GPU forward memory", (mbs.used[0] << 20), &gpu_mem); + pools[1] = new AlignedMemoryPool("GPU backward memory", (mbs.used[1] << 20), &gpu_mem); + pools[2] = new AlignedMemoryPool("GPU parameter memory", (mbs.used[2] << 20), &gpu_mem); +} + +Device_GPU::~Device_GPU() {} +#endif + +Device_CPU::Device_CPU(int my_id, const DeviceMempoolSizes & mbs, bool shared) : + Device(my_id, DeviceType::CPU, &cpu_mem), shmem(mem) { + if (shared) shmem = new SharedAllocator(); + kSCALAR_MINUSONE = (float*) mem->malloc(sizeof(float)); + *kSCALAR_MINUSONE = -1; + kSCALAR_ONE = (float*) mem->malloc(sizeof(float)); + *kSCALAR_ONE = 1; + kSCALAR_ZERO = (float*) mem->malloc(sizeof(float)); + *kSCALAR_ZERO = 0; + + // Initialize the Eigen device + edevice = new Eigen::DefaultDevice; + + // this is the big memory allocation. + pools[0] = new AlignedMemoryPool("CPU forward memory", (mbs.used[0] << 20), &cpu_mem); + pools[1] = new AlignedMemoryPool("CPU backward memory", (mbs.used[1] << 20), &cpu_mem); + pools[2] = new AlignedMemoryPool("CPU parameter memory", (mbs.used[2] << 20), shmem); +} + +Device_CPU::~Device_CPU() {} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/devices.h b/thirdparty/dynet/dynet/devices.h new file mode 100644 index 000000000..a67b49778 --- /dev/null +++ b/thirdparty/dynet/dynet/devices.h @@ -0,0 +1,80 @@ +#ifndef DYNET_DEVICES_H +#define DYNET_DEVICES_H + +#include +#include "dynet/aligned-mem-pool.h" +#include "dynet/cuda.h" + +namespace Eigen { + struct DefaultDevice; + class CudaStreamDevice; + struct GpuDevice; +} + +namespace dynet { + +enum class DeviceType {CPU, GPU}; +enum class DeviceMempool {FXS = 0, DEDFS = 1, PS = 2, NONE = 3}; + +struct ComputationGraph; // TODO is there a nicer way to resolve this cyclic dependency? +struct Tensor; + +struct DeviceMempoolSizes { + size_t used[3]; + DeviceMempoolSizes() = default; + DeviceMempoolSizes(size_t total_s); + DeviceMempoolSizes(size_t fxs_s, size_t dEdfs_s, size_t ps_s); + DeviceMempoolSizes(const std::string & descriptor); +}; + + +class Device { + protected: + Device(int i, DeviceType t, MemAllocator* m) : device_id(i), type(t), mem(m), pools(3, nullptr) {} + Device(const Device&) = delete; + Device& operator=(const Device&) = delete; + virtual ~Device(); + public: + int device_id; + DeviceType type; + MemAllocator* mem; + float* kSCALAR_MINUSONE; + float* kSCALAR_ONE; + float* kSCALAR_ZERO; + std::string name; + virtual DeviceMempoolSizes mark(ComputationGraph *cg); + virtual void revert(const DeviceMempoolSizes & cp); + void allocate_tensor(DeviceMempool mem_pool, Tensor & tensor); + std::vector pools; +}; + +#if HAVE_CUDA +class Device_GPU : public Device { + public: + typedef Eigen::CudaStreamDevice EigenDevice; + explicit Device_GPU(int my_id, const DeviceMempoolSizes & mb, int device_id); + ~Device_GPU(); + int cuda_device_id; + cublasHandle_t cublas_handle; +#if HAVE_CUDNN + cudnnHandle_t cudnnHandle; +#endif + Eigen::GpuDevice* edevice; + Eigen::CudaStreamDevice* estream; + GPUAllocator gpu_mem; +}; +#endif + +class Device_CPU : public Device { + public: + typedef Eigen::DefaultDevice EigenDevice; + explicit Device_CPU(int my_id, const DeviceMempoolSizes & mb, bool shared); + ~Device_CPU(); + CPUAllocator cpu_mem; + Eigen::DefaultDevice* edevice; + MemAllocator* shmem; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/dict.cc b/thirdparty/dynet/dynet/dict.cc new file mode 100644 index 000000000..22f625836 --- /dev/null +++ b/thirdparty/dynet/dynet/dict.cc @@ -0,0 +1,48 @@ +#include "dict.h" + +#include +#include +#include + +using namespace std; + +namespace dynet { + +std::vector read_sentence(const std::string& line, Dict& sd) { + std::istringstream in(line); + std::string word; + std::vector res; + while(in) { + in >> word; + if (!in || word.empty()) break; + res.push_back(sd.convert(word)); + } + return res; +} + +void read_sentence_pair(const std::string& line, std::vector& s, Dict& sd, std::vector& t, Dict& td) { + std::istringstream in(line); + std::string word; + std::string sep = "|||"; + Dict* d = &sd; + std::vector* v = &s; + while(in) { + in >> word; + if (!in) break; + if (word == sep) { d = &td; v = &t; continue; } + v->push_back(d->convert(word)); + } +} + +#if BOOST_VERSION >= 105600 + DYNET_SERIALIZE_COMMIT(Dict, DYNET_SERIALIZE_DEFINE(frozen, map_unk, unk_id, words_, d_)) +#else + template + void Dict::serialize(Archive& ar, const unsigned int) { + throw std::invalid_argument("Serializing dictionaries is only supported on versions of boost 1.56 or higher"); + } +#endif +DYNET_SERIALIZE_IMPL(Dict) + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/dict.h b/thirdparty/dynet/dynet/dict.h new file mode 100644 index 000000000..62b79cef1 --- /dev/null +++ b/thirdparty/dynet/dynet/dict.h @@ -0,0 +1,89 @@ +#ifndef DYNET_DICT_H_ +#define DYNET_DICT_H_ + +#include +#include +#include +#include +#include + +#include "dynet/io-macros.h" +#include "dynet/except.h" + +namespace boost { namespace serialization { class access; } } + +namespace dynet { + +class Dict { +typedef std::unordered_map Map; +public: + Dict() : frozen(false), map_unk(false), unk_id(-1) { + } + + inline unsigned size() const { return words_.size(); } + + inline bool contains(const std::string& words) { + return !(d_.find(words) == d_.end()); + } + + void freeze() { frozen = true; } + bool is_frozen() { return frozen; } + + inline int convert(const std::string& word) { + auto i = d_.find(word); + if (i == d_.end()) { + if (frozen) { + if (map_unk) + return unk_id; + else + DYNET_RUNTIME_ERR("Unknown word encountered in frozen dictionary: " << word); + } + words_.push_back(word); + return d_[word] = words_.size() - 1; + } else { + return i->second; + } + } + + inline const std::string& convert(const int& id) const { + DYNET_ARG_CHECK(id < (int)words_.size(), + "Out-of-bounds error in Dict::convert for word ID " << id << + " (dict size: " << words_.size() << ")"); + return words_[id]; + } + + void set_unk(const std::string& word) { + if (!frozen) + DYNET_RUNTIME_ERR("Please call set_unk() only after dictionary is frozen"); + if (map_unk) + DYNET_RUNTIME_ERR("Set UNK more than one time"); + + // temporarily unfrozen the dictionary to allow the add of the UNK + frozen = false; + unk_id = convert(word); + frozen = true; + + map_unk = true; + } + + int get_unk_id() const { return unk_id; } + const std::vector & get_words() const { return words_; } + + void clear() { words_.clear(); d_.clear(); } + +private: + bool frozen; + bool map_unk; // if true, map unknown word to unk_id + int unk_id; + std::vector words_; + Map d_; + + DYNET_SERIALIZE_DECLARE() +}; + +std::vector read_sentence(const std::string& line, Dict& sd); +void read_sentence_pair(const std::string& line, std::vector& s, Dict& sd, std::vector& t, Dict& td); + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/dim.cc b/thirdparty/dynet/dynet/dim.cc new file mode 100644 index 000000000..7178f701c --- /dev/null +++ b/thirdparty/dynet/dynet/dim.cc @@ -0,0 +1,30 @@ +#include "dynet/dim.h" + +#include + +using namespace std; + +namespace dynet { + +ostream& operator<<(ostream& os, const Dim& d) { + os << '{'; + for (unsigned i = 0; i < d.nd; ++i) { + if (i) os << ','; + os << d.d[i]; + } + if(d.bd != 1) os << 'X' << d.bd; + return os << '}'; +} + +ostream& operator<<(ostream& os, const vector& ds) { + os << '['; + for (unsigned i = 0; i < ds.size(); ++i) + os << (i ? " " : "") << ds[i]; + return os << ']'; +} + +DYNET_SERIALIZE_COMMIT(Dim, DYNET_SERIALIZE_DEFINE(nd, d)) +DYNET_SERIALIZE_IMPL(Dim) + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/dim.h b/thirdparty/dynet/dynet/dim.h new file mode 100644 index 000000000..e472f7177 --- /dev/null +++ b/thirdparty/dynet/dynet/dim.h @@ -0,0 +1,323 @@ +/** + * \defgroup dim dim + * \ingroup dim + * \file dim.h + * \brief Dynet's way of implementing minibatching + */ + +#ifndef DYNET_DIM_H +#define DYNET_DIM_H + +#include +#include +#include +#include +#include +#include + +#include "dynet/io-macros.h" +#include "dynet/except.h" + +/** + * \ingroup dim + * Maximum number of dimensions supported by dynet : 7 + */ +#define DYNET_MAX_TENSOR_DIM 7 + +namespace boost { namespace serialization { class access; } } + +namespace dynet { + +/** + * \ingroup dim + * \struct Dim + * \brief The Dim struct stores information about the dimensionality of expressions. + * \details Batch dimension is treated separately from standard dimension. + */ +struct Dim { + /** + * \brief Default constructor + */ + Dim() : nd(0), bd(1) {} + // explicit Dim(unsigned int m) : nd(1), bd(1) { d[0] = m; } + // TODO: The constructors for dimensions w/ and w/o batches is not intuitive. + // can this be fixed in some way? + // Dim(unsigned int m, unsigned int n) : nd(2), bd(1) { d[0] = m; d[1] = n; } + /** + * \brief Initialize from a list of dimensions + * \details The batch dimension is 1 in this case (non-batched expression) + * + * \param x List of dimentions + */ + Dim(std::initializer_list x) : nd(0), bd(1) { + for (auto v : x) d[nd++] = v; + } + /** + * \brief Initialize from a list of dimensions and a batch size + * + * \param x List of dimentions + * \param b Batch size + */ + Dim(std::initializer_list x, unsigned int b) : nd(0), bd(b) { + for (auto v : x) d[nd++] = v; + } + /** + * \brief Initialize from a vector of dimensions + * \details The batch dimension is 1 in this case (non-batched expression) + * + * \param x Array of dimentions + */ + Dim(const std::vector & x) : nd(0), bd(1) { + for (auto v : x) d[nd++] = v; + } + /** + * \brief Initialize from a vector of dimensions and a batch size + * + * \param x Vector of dimentions + * \param b Batch size + */ + Dim(const std::vector & x, unsigned int b) : nd(0), bd(b) { + for (auto v : x) d[nd++] = v; + } + /** + * \brief Total size of a batch + * \return Batch size * size of a batch + */ + inline unsigned int size() const { + return batch_size() * bd; + } + /** + * \brief Size of a batch (product of all dimensions) + * \return Size of a batch + */ + inline unsigned int batch_size() const { + unsigned int p = 1; + for (unsigned int i = 0; i < nd; ++i) p *= d[i]; + return p; + } + /** + * \brief Sum of all dimensions within a batch + * \return Sum of the dimensions within a batch + */ + inline unsigned int sum_dims() const { + unsigned int p = 0; + for (unsigned int i = 0; i < nd; ++i) p += d[i]; + return p; + } + /** + * \brief [TODO] + * \details [long description] + * \return [description] + */ + inline Dim truncate() const { + Dim r = *this; + unsigned int m = 1; + unsigned int s = size(); + for (unsigned int i = 1; i < s; ++i) + if (size(i) > 1) m = i + 1; + r.resize(m); + return r; + } + /** + * \brief Set the batch dimension to 1 + * \return 1-batch version of this instance + */ + inline Dim single_batch() const { + Dim r = *this; + r.bd = 1; + return r; + } + /** + * \brief Change the number of dimensions + * + * \param int New number of dimensions + */ + inline void resize(unsigned int i) { + while(nd < i) + d[nd++] = 1; + nd = i; + } + /** + * \brief Get number of dimensions + * \return Number of dimensions + */ + inline unsigned int ndims() const { return nd; } + /** + * \brief Size of the first dimension + * \return Size of the first dimension + */ + inline unsigned int rows() const { return d[0]; } + /** + * \brief Number of non-one dimensions + * \return Number of non-one dimensions + */ + inline unsigned int num_nonone_dims() const { + int ret = 0; + for(size_t i = 0; i < nd; ++i) + if(d[i] != 1) + ++ret; + return ret; + } + /** + * \brief Size of the second dimension (or 1 if only one dimension) + * \return Size of the second dimension (or 1 if only one dimension) + */ + inline unsigned int cols() const { return nd > 1 ? d[1] : 1; } + /** + * \brief Batch dimension + * \return Batch dimension + */ + inline unsigned int batch_elems() const { return bd; } + /** + * \brief Set specific dimension + * \details Set the value of a specific dimension to an arbitrary value + * + * \param i Dimension index + * \param s Dimension size + */ + inline void set(unsigned int i, unsigned int s) { + DYNET_ARG_CHECK(i < nd || s == 1, "Out of bounds exception in Dim::set(" << i << "," << s << ") for node of size " << d); + DYNET_ARG_CHECK(s != 0, "Attempt to set dimension size to zero in Dim::set(" << i << "," << s << ") for node of size " << d); + d[i] = s; + } + /** + * \brief Access a specific dimension as you would access an array element + * + * \param i Dimension index + * \return Size of dimension i + */ + inline unsigned int operator[](unsigned int i) const { return i < nd ? d[i] : 1; } + /** + * \brief Size of dimension i + * + * \param i Dimension index + * \return Size of dimension i + */ + inline unsigned int size(unsigned int i) const { return (*this)[i]; } + /** + * \brief Remove one of the dimensions + * \param i index of the dimension to be removed + */ + inline void delete_dim(unsigned int i) { + DYNET_ARG_CHECK(i < nd, "Out of bounds exception in Dim::delete_dim(" << i << ") for node of size " << d ); + if(i == nd-1){ + if(nd == 1){ + d[0] = 1; + } + else{ + --nd; + } + } + else{ + for(; i + 1 < nd; ++i){ + d[i] = d[i + 1]; + } + --nd; + } + } + /** + * \brief Remove multi-dimensions + * \param dims dimensions to be removed + * \param reduce_batch reduce the batch dimension or not + */ + inline void delete_dims(std::vector dims, bool reduce_batch){ + std::vector deleted_dims(nd, false); + + for(unsigned int i = 0; i < dims.size(); i++) { + DYNET_ARG_CHECK(dims[i] < nd, "Out of bounds exception in Dim::delete_dims"); + deleted_dims[dims[i]] = true; + } + + if(dims.size() == nd) { + nd = 1; + d[0] = 1; + } else { + int flag = 0; + for(unsigned int i = 0; i < nd; i++) { + if(!deleted_dims[i]) + d[flag++] = d[i]; + } + nd = flag; + } + + if(reduce_batch) + bd = 1; + } + /** + * \brief Insert a dimension + * \param i the index before which to insert the new dimension + * \param n the size of the new dimension + */ + inline void insert_dim(unsigned int i, unsigned int n) { + DYNET_ARG_CHECK(i <= nd, "Out of bounds exception in Dim::delete_dim(" << i << ") for node of size " << d); + if (nd == 1) { + d[0] = 1; + } else { + for (; i + 1 < nd; ++i) + d[i] = d[i + 1]; + --nd; + } + } + /** + * \brief Transpose a vector or a matrix + * \details This raises an invalid_argument exception on tensors with more than 2 dimensions + * \return The transposed Dim structure + */ + inline Dim transpose() const { + if (nd == 1) { return Dim({1, d[0]}, bd); } + else { + DYNET_ARG_CHECK(nd == 2, "Cannot transpose Dim object with more than 2 dimensions, but got " << d); + return Dim({d[1], d[0]}, bd); + } + } + + unsigned int d[DYNET_MAX_TENSOR_DIM]; /**< Array of dimension */ + unsigned int nd; /**< Number of dimensions */ + unsigned int bd; /**< Batch dimension */ +private: + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \brief Check for equality between two Dim + * \details Two Dim struct are considered equal if their dimensions and batch size are equal + * + * \param a First Dim + * \param b Second Dim + * + * \return a==b + */ +inline bool operator==(const Dim& a, const Dim& b) { + if (a.nd != b.nd || a.bd != b.bd) return false; + return std::memcmp(a.d, b.d, a.nd) == 0; +} + +/** + * \brief Check for inequality of two Dim structs + * \details See equality + * + * \param a First Dim + * \param b Second Dim + * + * \return a!=b + */ +inline bool operator!=(const Dim& a, const Dim& b) { return !(a == b); } + +/** + * \brief Print Dim to output stream + * + * \param os Output stream + * \param d Dim + */ +std::ostream& operator<<(std::ostream& os, const Dim& d); +/** + * \brief Print vector of Dims to output stream + * + * \param os Output stream + * \param ds vector of Dims + */ +std::ostream& operator<<(std::ostream& os, const std::vector& ds); + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/dynet-helper.h b/thirdparty/dynet/dynet/dynet-helper.h new file mode 100644 index 000000000..b98e05d00 --- /dev/null +++ b/thirdparty/dynet/dynet/dynet-helper.h @@ -0,0 +1,25 @@ +#ifndef DYNET_HELPER_H_ +#define DYNET_HELPER_H_ + +#include + +/// helper functions + +namespace dynet { + +/** + this fix a compilation problem in cygwin +*/ +#if defined(__CYGWIN__) + template + inline std::string to_string(T value) + { + std::ostringstream os; + os << value; + return os.str(); + } +#endif + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/dynet.cc b/thirdparty/dynet/dynet/dynet.cc new file mode 100644 index 000000000..466327cf0 --- /dev/null +++ b/thirdparty/dynet/dynet/dynet.cc @@ -0,0 +1,353 @@ +#include "dynet/dynet.h" + +#include "dynet/exec.h" +#include "dynet/nodes.h" +#include "dynet/param-nodes.h" +#include "dynet/aligned-mem-pool.h" +#include "dynet/dynet-helper.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + +float* kSCALAR_MINUSONE; +float* kSCALAR_ONE; +float* kSCALAR_ZERO; +int n_hgs = 0; +unsigned n_cumul_hgs = 0; + +int get_number_of_active_graphs() {return n_hgs;}; +unsigned get_current_graph_id() {return n_cumul_hgs;}; + +Node::~Node() {} +size_t Node::aux_storage_size() const { return 0; } + +// perform the forward/backward passes in one or multiple calls +// TODO: This is a lot of code for something simple. Can it be shortened? +void Node::forward(const std::vector& xs, + Tensor& fx) const { + if (this->supports_multibatch() || fx.d.batch_elems() == 1) { + forward_impl(xs, fx); + } else { + size_t i; + std::vector xs_elems(xs.size()); + std::vector xs_ptrs(xs.size()); + std::vector xs_sizes(xs.size()); + for (i = 0; i < xs.size(); ++i) { + xs_elems[i] = xs[i]->batch_elem(0); + xs_ptrs[i] = &xs_elems[i]; + xs_sizes[i] = xs_elems[i].d.size(); + } + Tensor fx_elem(fx.batch_elem(0)); + size_t fx_size = fx_elem.d.size(); + forward_impl(xs_ptrs, fx_elem); + for (unsigned b = 1; b < fx.d.batch_elems(); ++b) { + for (i = 0; i < xs.size(); ++i) + if (xs[i]->d.bd > 1) + xs_elems[i].v += xs_sizes[i]; + fx_elem.v += fx_size; + forward_impl(xs_ptrs, fx_elem); + } + } +} + +void Node::backward(const std::vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned xs_i, + Tensor& dEdxi) const { + if (this->supports_multibatch() || fx.d.batch_elems() == 1) { + backward_impl(xs, fx, dEdf, xs_i, dEdxi); + } else { + size_t i; + std::vector xs_elems(xs.size()); + std::vector xs_ptrs(xs.size()); + std::vector xs_sizes(xs.size()); + for (i = 0; i < xs.size(); ++i) { + xs_elems[i] = xs[i]->batch_elem(0); + xs_ptrs[i] = &xs_elems[i]; + xs_sizes[i] = xs_elems[i].d.size(); + } + Tensor fx_elem(fx.batch_elem(0)); + size_t fx_size = fx_elem.d.size(); + Tensor dEdf_elem(dEdf.batch_elem(0)); + size_t dEdf_size = dEdf_elem.d.size(); + Tensor dEdxi_elem(dEdxi.batch_elem(0)); + size_t dEdxi_size = dEdxi_elem.d.size(); + backward_impl(xs_ptrs, fx_elem, dEdf_elem, xs_i, dEdxi_elem); + for (unsigned b = 1; b < fx.d.batch_elems(); ++b) { + for (i = 0; i < xs.size(); ++i) + if (xs[i]->d.bd > 1) + xs_elems[i].v += xs_sizes[i]; + fx_elem.v += fx_size; + dEdf_elem.v += dEdf_size; + if (dEdxi.d.bd > 1) + dEdxi_elem.v += dEdxi_size; + backward_impl(xs_ptrs, fx_elem, dEdf_elem, xs_i, dEdxi_elem); + } + } +} + +ComputationGraph::ComputationGraph(): + ee(new SimpleExecutionEngine(*this)) { + if (n_hgs > 0) { + cerr << "Memory allocator assumes only a single ComputationGraph at a time.\n"; + throw std::runtime_error("Attempted to create >1 CG"); + } + ++n_hgs; + immediate_compute = false; + check_validity = false; + ++n_cumul_hgs; + graph_id = n_cumul_hgs; +} + +ComputationGraph::~ComputationGraph() { + this->clear(); + delete ee; + --n_hgs; +} + +void ComputationGraph::clear() { + parameter_nodes.clear(); + for (auto n : nodes) delete n; + nodes.clear(); +} + +CGCheckpoint ComputationGraph::_get_checkpoint() { + CGCheckpoint p; + p.device_mem_checkpoint = default_device->mark(this); + p.node_idx = nodes.size(); + p.par_node_idx = parameter_nodes.size(); + return p; +} + +void ComputationGraph::_revert(CGCheckpoint p) { + default_device->revert(p.device_mem_checkpoint); + // clear all nodes at position >= p.node_idx + if ((int)nodes.size() > p.node_idx) { + nodes.resize(p.node_idx); // TODO verify deletion of nodes. + ee->invalidate(p.node_idx - 1); // clear precomputed forward values + } + // clear all parameter nodes at position >= p.par_node_idx + if ((int)parameter_nodes.size() > p.par_node_idx) { + parameter_nodes.resize(p.par_node_idx); + } +} + +void ComputationGraph::checkpoint() { + checkpoints.push_back(_get_checkpoint()); +} + +void ComputationGraph::revert() { + if (checkpoints.size() == 0) return; + _revert(checkpoints.back()); + checkpoints.pop_back(); +} + +Dim& ComputationGraph::get_dimension(VariableIndex index) const { + return nodes[index]->dim; +} + + + +VariableIndex ComputationGraph::add_input(real s) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new ScalarInputNode(s)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_input(const real* ps) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new ScalarInputNode(ps)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_input(const Dim& d, const vector& pm) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new InputNode(d, pm)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_input(const Dim& d, const vector* pm) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new InputNode(d, pm)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_input(const Dim& d, const vector& ids, const vector& data, float defdata) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new SparseInputNode(d, ids, data, defdata)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_parameters(Parameter p) { + VariableIndex new_node_index(nodes.size()); + ParameterNode* new_node = new ParameterNode(p); + nodes.push_back(new_node); + parameter_nodes.push_back(new_node_index); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_parameters(LookupParameter p) { + VariableIndex new_node_index(nodes.size()); + ParameterNode* new_node = new ParameterNode(p); + nodes.push_back(new_node); + parameter_nodes.push_back(new_node_index); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_const_parameters(Parameter p) { + VariableIndex new_node_index(nodes.size()); + ConstParameterNode* new_node = new ConstParameterNode(p); + nodes.push_back(new_node); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_const_parameters(LookupParameter p) { + VariableIndex new_node_index(nodes.size()); + ConstParameterNode* new_node = new ConstParameterNode(p); + nodes.push_back(new_node); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_lookup(LookupParameter p, const unsigned* pindex) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, pindex); + nodes.push_back(new_node); + parameter_nodes.push_back(new_node_index); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_lookup(LookupParameter p, unsigned index) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, index); + nodes.push_back(new_node); + parameter_nodes.push_back(new_node_index); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_lookup(LookupParameter p, const std::vector& indices) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, indices); + nodes.push_back(new_node); + parameter_nodes.push_back(new_node_index); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_lookup(LookupParameter p, const std::vector* indices) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, indices); + nodes.push_back(new_node); + parameter_nodes.push_back(new_node_index); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + + +VariableIndex ComputationGraph::add_const_lookup(LookupParameter p, const unsigned* pindex) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, pindex); + // get rid of the following in favor of using parameter_nodes to see the needs_derivative + // expression + nodes.push_back(new_node); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_const_lookup(LookupParameter p, unsigned index) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, index); + nodes.push_back(new_node); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_const_lookup(LookupParameter p, const std::vector& indices) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, indices); + nodes.push_back(new_node); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +VariableIndex ComputationGraph::add_const_lookup(LookupParameter p, const std::vector* indices) { + VariableIndex new_node_index(nodes.size()); + LookupNode* new_node = new LookupNode(p, indices); + nodes.push_back(new_node); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +// factory function should call this right after creating a new node object +// to set its dimensions properly +void ComputationGraph::set_dim_for_new_node(const VariableIndex& i) { + Node* node = nodes[i]; + vector xds(node->arity()); + unsigned ai = 0; + for (VariableIndex arg : node->args) { + xds[ai] = nodes[arg]->dim; + ++ai; + } + node->dim = node->dim_forward(xds); + node->set_cg(this); + if (immediate_compute) { + const Tensor& value = incremental_forward(i); + if (check_validity) + if (!value.is_valid()) { + cerr << "NaN or Inf detected\n"; + throw std::runtime_error("NaN or Inf detected"); + } + } +} + +const Tensor& ComputationGraph::incremental_forward(const expr::Expression& last) { return ee->incremental_forward(last.i); } +const Tensor& ComputationGraph::forward(const expr::Expression& last) { return ee->forward(last.i); } +const Tensor& ComputationGraph::incremental_forward(VariableIndex last) { return ee->incremental_forward(last); } +const Tensor& ComputationGraph::forward(VariableIndex last) { return ee->forward(last); } +const Tensor& ComputationGraph::get_value(VariableIndex i) { return ee->get_value(i); } +const Tensor& ComputationGraph::get_value(const expr::Expression& e) { return this->get_value(e.i); } +const Tensor& ComputationGraph::get_gradient(VariableIndex i) { return ee->get_gradient(i); } +const Tensor& ComputationGraph::get_gradient(const expr::Expression& e) { return this->get_gradient(e.i); } +void ComputationGraph::invalidate() { ee->invalidate(); } +void ComputationGraph::backward(const expr::Expression& last, bool full) { ee->backward(last.i, full); } +void ComputationGraph::backward(VariableIndex i, bool full) { ee->backward(i, full); } + +void ComputationGraph::set_immediate_compute(bool ic) { + immediate_compute = ic; +} + +void ComputationGraph::set_check_validity(bool cv) { + check_validity = cv; +} + +void ComputationGraph::print_graphviz() const { + cerr << "digraph G {\n rankdir=LR;\n nodesep=.05;\n"; + unsigned nc = 0; + for (auto node : nodes) { + vector var_names; + for (auto arg : node->args) + var_names.push_back(string("v") + to_string((unsigned)arg)); + cerr << " N" << nc << " [label=\"v" << nc << " = " + << node->as_string(var_names) << "\"];\n"; + for (auto arg : node->args) + cerr << " N" << ((unsigned)arg) << " -> N" << nc << ";\n"; + ++nc; + } + cerr << "}\n"; +} + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/dynet.h b/thirdparty/dynet/dynet/dynet.h new file mode 100644 index 000000000..39196cb37 --- /dev/null +++ b/thirdparty/dynet/dynet/dynet.h @@ -0,0 +1,633 @@ +/** + * \file dynet.h + * \defgroup compgraph compgraph + * \defgroup nodes nodes + */ +#ifndef DYNET_DYNET_H_ +#define DYNET_DYNET_H_ + +#include +#include +#include +#include +#include + +#include + +#include "dynet/init.h" +#include "dynet/aligned-mem-pool.h" +#include "dynet/tensor.h" +#include "dynet/model.h" +#include "dynet/devices.h" + + +namespace dynet { + +extern float* kSCALAR_MINUSONE; +extern float* kSCALAR_ONE; +extern float* kSCALAR_ZERO; + +/** + * \ingroup compgraph + * \brief Gets the number of active graphs + * \details This is 0 or 1, you can't create more than one graph at once + * \return Number of active graphs + */ +int get_number_of_active_graphs(); +/** + * \ingroup compgraph + * \brief Get id of the current active graph + * \details This can help check whether a graph is stale + * \return Id of the current graph + */ +unsigned get_current_graph_id(); + +// devices provide information about GPUs and CPUs +// these include any API information that is required to make calls +// to the GPU as well as the memory pools for the device +// Device is not copyable, so you can use the pointer to uniquely +// identify the device +//extern std::vector devices; // [0] is always the CPU +extern Device* default_device; // where parameters go by default + +class ExecutionEngine; +struct ParameterNodeBase; +struct Node; +namespace expr { struct Expression; } + +BOOST_STRONG_TYPEDEF(unsigned, VariableIndex) + +struct CGCheckpoint { + int node_idx; + int par_node_idx; + DeviceMempoolSizes device_mem_checkpoint; +}; + +inline void swap(VariableIndex& i1, VariableIndex& i2) { + VariableIndex t = i1; + i1 = i2; + i2 = t; +} + +/** + * \ingroup compgraph + * \brief Computation graph where nodes represent forward and backward intermediate values, and edges represent functions of multiple values. + * \details To represent the fact that a function may have multiple arguments, edges have a single head and 0, 1, 2, or more tails. (Constants, inputs, and parameters are represented as functions of 0 parameters.) + * Example: given the function z = f(x, y), z, x, and y are nodes, and there is an edge representing f with which points to the z node (i.e., its head), and x and y are the tails of the edge. + * You shouldn't need to use most methods from the ComputationGraph except for `backward` since most of them are available directly from the Expression class. + */ +struct ComputationGraph { + /** + * \brief Default constructor + */ + ComputationGraph(); + ~ComputationGraph(); + + // INPUTS + /** + * \brief Add scalar input + * \details The computational network will pull inputs in from the user's data structures and make them available to the computation + * + * \param s Real number + * \return The index of the created variable + */ + VariableIndex add_input(real s); // + /** + * \brief Add scalar input by pointer + * \details The computational network will pull inputs in from the user's data structures and make them available to the computation + * + * \param ps Pointer to a real number + * \return The index of the created variable + */ + VariableIndex add_input(const real* ps); // add pointer to scalar + /** + * \brief Add multidimentsional input + * \details The computational network will pull inputs in from the user's data structures and make them available to the computation + * + * \param d Desired shape of the input + * \param data Input data (as a 1 dimensional array) + * \return The index of the created variable + */ + VariableIndex add_input(const Dim& d, const std::vector& data); + /** + * \brief Add multidimentsional input by pointer + * \details The computational network will pull inputs in from the user's data structures and make them available to the computation + * + * \param d Desired shape of the input + * \param pdata Pointer to the input data (as a 1 dimensional array) + * \return The index of the created variable + */ + VariableIndex add_input(const Dim& d, const std::vector* pdata); + /** + * \brief Add sparse input + * \details The computational network will pull inputs in from the user's data structures and make them available to the computation. Represents specified (not learned) inputs to the network in sparse array format, with an optional default value. + * + * \param d Desired shape of the input + * \param ids The indexes of the data points to update + * \param data The data points corresponding to each index + * \param defdata The default data with which to set the unspecified data points + * \return The index of the created variable + */ + VariableIndex add_input(const Dim& d, const std::vector& ids, const std::vector& data, float defdata = 0.f); + + // PARAMETERS + // parameters are things that are optimized. in contrast to a system like + // Torch where computational modules may have their own parameters, in DYNET + // parameters are just parameters + /** + * \brief Add a parameter to the computation graph + * + * \param p Parameter to be added + * \return The index of the created variable + */ + VariableIndex add_parameters(Parameter p); + /** + * \brief Add a full matrix of lookup parameters to the computation graph + * + * \param p LookupParameter to be added + * \return The index of the created variable + */ + VariableIndex add_parameters(LookupParameter p); + /** + * \brief Add a parameter to the computation graph (but don't update) + * + * \param p Parameter to be added + * \return The index of the created variable + */ + VariableIndex add_const_parameters(Parameter p); + /** + * \brief Add a full matrix of lookup parameter to the computation graph (but don't update) + * + * \param p LookupParameter to be added + * \return The index of the created variable + */ + VariableIndex add_const_parameters(LookupParameter p); + // use pindex to point to a memory location where the index will live + // that the caller owns + /** + * \brief Add a lookup parameter to the computation graph + * \details Use pindex to point to a memory location where the index will live that the caller owns + * + * \param p Lookup parameter from which to pick + * \param pindex Pointer to the index to lookup + * + * \return The index of the created variable + */ + VariableIndex add_lookup(LookupParameter p, const unsigned* pindex); + /** + * \brief Add a lookup parameter to the computation graph + * + * \param p Lookup parameter from which to pick + * \param index Index to lookup + * + * \return The index of the created variable + */ + VariableIndex add_lookup(LookupParameter p, unsigned index); + /** + * \brief Add lookup parameters to the computation graph + * \details Use pindices to point to a memory location where the indices will live that the caller owns + * + * \param p Lookup parameter from which to pick + * \param pindices Pointer to the indices to lookup + * + * \return The index of the created variable + */ + VariableIndex add_lookup(LookupParameter p, const std::vector* pindices); + /** + * \brief Add lookup parameters to the computation graph + * + * \param p Lookup parameter from which to pick + * \param indices Indices to lookup + * + * \return The index of the created variable + */ + VariableIndex add_lookup(LookupParameter p, const std::vector& indices); + // + /** + * \brief Add a lookup parameter to the computation graph + * \details Just like add_lookup, but don't optimize the lookup parameters + * + * \param p Lookup parameter from which to pick + * \param pindex Pointer to the indices to lookup + * + * \return The index of the created variable + */ + VariableIndex add_const_lookup(LookupParameter p, const unsigned* pindex); + /** + * \brief Add a lookup parameter to the computation graph + * \details Just like add_lookup, but don't optimize the lookup parameters + * + * \param p Lookup parameter from which to pick + * \param index Index to lookup + * + * \return The index of the created variable + */ + VariableIndex add_const_lookup(LookupParameter p, unsigned index); + /** + * \brief Add lookup parameters to the computation graph + * \details Just like add_lookup, but don't optimize the lookup parameters + * + * \param p Lookup parameter from which to pick + * \param pindices Pointer to the indices to lookup + * + * \return The index of the created variable + */ + VariableIndex add_const_lookup(LookupParameter p, const std::vector* pindices); + /** + * \brief Add lookup parameters to the computation graph + * \details Just like add_lookup, but don't optimize the lookup parameters + * + * \param p Lookup parameter from which to pick + * \param indices Indices to lookup + * + * \return The index of the created variable + */ + VariableIndex add_const_lookup(LookupParameter p, const std::vector& indices); + + // COMPUTATIONS + /** + * \brief Add a function to the computation graph + * \details This what is called when creating an expression + * + * \param arguments List of the arguments indices + * \tparam Function Function to be applied + * \return The index of the output variable + */ + template inline VariableIndex add_function(const std::initializer_list& arguments); + /** + * \brief Add a function to the computation graph (with side information) + * \details This what is called when creating an expression + * + * \param arguments List of the arguments indices + * \param side_information Side information that is needed to compute the function + * \tparam Function Function to be applied + * \return The index of the output variable + */ + template + inline VariableIndex add_function(const std::initializer_list& arguments, + Args&&... side_information); + template + inline VariableIndex add_function(const T& arguments); + template + inline VariableIndex add_function(const T& arguments, + Args&&... side_information); + + // reset ComputationGraph to a newly created state + /** + * \brief Reset ComputationGraph to a newly created state + * \details [long description] + */ + void clear(); + /** + * \brief Set a checkpoint + */ + void checkpoint(); + /** + * \brief Revert to last checkpoint + */ + void revert(); + + /** + * \brief Get dimension of a node + * + * \param index Variable index of the node + * \return Dimension + */ + Dim& get_dimension(VariableIndex index) const; + + + // perform computations + + // run complete forward pass from first node to given one, ignoring all precomputed values. + /** + * \brief Run complete forward pass from first node to given one, ignoring all precomputed values. + * + * \param last Expression up to which the forward pass must be computed + * \return Value of the `last` Expression after execution + */ + const Tensor& forward(const expr::Expression& last); + /** + * \brief Run complete forward pass from first node to given one, ignoring all precomputed values. + * + * \param i Variable index of the node up to which the forward pass must be computed + * \return Value of the end Node after execution + */ + const Tensor& forward(VariableIndex i); + /** + * \brief Run forward pass from the last computed node to given one. + * \details Useful if you want to add nodes and evaluate just the new parts. + * + * \param last Expression up to which the forward pass must be computed + * \return Value of the `last` Expression after execution + */ + const Tensor& incremental_forward(const expr::Expression& last); + /** + * \brief Run forward pass from the last computed node to given one. + * \details Useful if you want to add nodes and evaluate just the new parts. + * + * \param last Variable index of the node up to which the forward pass must be computed + * \return Value of the end Node after execution + */ + const Tensor& incremental_forward(VariableIndex i); + /** + * \brief Get forward value for node at index i. + * \details Performs forward evaluation if note available (may compute more than strictly what is needed). + * + * \param i Index of the variable from which you want the value + * \return Requested value + */ + const Tensor& get_value(VariableIndex i); + /** + * \brief Get forward value for the given expression + * \details Performs forward evaluation if note available (may compute more than strictly what is needed). + * + * \param e Expression from which you want the value + * \return Requested value + */ + const Tensor& get_value(const expr::Expression& e); + + /** + * \brief Get gradient for node at index i. + * \details Performs backward pass if not available (may compute more than strictly what is needed). + * + * \param i Index of the variable from which you want the gradient + * \return Requested gradient + */ + const Tensor& get_gradient(VariableIndex i); + /** + * \brief Get forward gradient for the given expression + * \details Performs backward pass if not available (may compute more than strictly what is needed). + * + * \param e Expression from which you want the gradient + * \return Requested gradient + */ + const Tensor& get_gradient(const expr::Expression& e); + /** + * \brief Clears forward caches (for get_value etc). + */ + void invalidate(); + /** + * \brief Computes backward gradients from the front-most evaluated node. + * + * \details The parameter `full` specifies whether the gradients should be computed for all nodes (`true`) or only non-constant nodes. + * + * By default, a node is constant unless + * + * 1. it is a parameter node + * 2. it depends on a non-constant node + * + * Thus, functions of constants and inputs are considered as constants. + * + * Turn `full` on if you want to retrieve gradients w.r.t. inputs for instance. By default this is turned off, so that the backward pass ignores nodes which have no influence on gradients w.r.t. parameters for efficiency. + * + * \param last Expression from which to compute the gradient + * \param full Whether to compute all gradients (including with respect to constant nodes). + */ + void backward(const expr::Expression& last, bool full = false); + /** + * \brief Computes backward gradients from node i (assuming it already been evaluated). + * + * \details The parameter `full` specifies whether the gradients should be computed for all nodes (`true`) or only non-constant nodes. + * + * By default, a node is constant unless + * + * 1. it is a parameter node + * 2. it depends on a non-constant node + * + * Thus, functions of constants and inputs are considered as constants. + * + * Turn `full` on if you want to retrieve gradients w.r.t. inputs for instance. By default this is turned off, so that the backward pass ignores nodes which have no influence on gradients w.r.t. parameters for efficiency. + * + * \param i Index of the node from which to compute the gradient + * \param full Whether to compute all gradients (including with respect to constant nodes). Turn this on if you want to retrieve gradients w.r.t. inputs for instance. By default this is turned off, so that the backward pass ignores nodes which have no influence on gradients w.r.t. parameters for efficiency. + */ + void backward(VariableIndex i, bool full = false); + // set immediate_compute variable + void set_immediate_compute(bool ic); + // set check_validity variable + void set_check_validity(bool cv); + + /** + * \brief Used for debugging + */ + void print_graphviz() const; + + /** + * \brief Get the unique graph ID + * \details This ID is incremented by 1 each time a computation graph is created + * \return graph is + */ + unsigned get_id() const {return graph_id;}; + + // data + std::vector nodes; // **stored in topological order** + std::vector parameter_nodes; // nodes that contain parameters that can be updated (subset of nodes) + + ExecutionEngine* ee; // handles the execution +private: + unsigned graph_id; + // flag of whether to compute immediately for each expression, i.e., an imperative execution style to help debug. + bool immediate_compute; + // flag of checking Inf/NaN of each layer. Only performing checking when immediate_compute is also set to true. + bool check_validity; + void set_dim_for_new_node(const VariableIndex& i); + + std::vector checkpoints; + CGCheckpoint _get_checkpoint(); + void _revert(CGCheckpoint checkpoint); +}; + +// represents an SSA variable +// * in_edge is the **ordered** list of indices of the function arguments +// * fx is the computed value of the variable +// * dEdf is the derivative of the output with respect to the function +/** + * \ingroup nodes + * \brief Represents an SSA variable + * \details Contains information on tha computation node : arguments, output value and gradient of the output with respect to the function. + * This class must be inherited to implement any new operation. See nodes.cc for examples. + * An operation on expressions can then be created from the new Node, see expr.h/expr.cc for examples + */ +struct Node { + virtual ~Node(); + + /** + * \brief Compute dimensions of result for given dimensions of inputs + * \details Also checks to make sure inputs are compatible with each other + * + * \param xs Vector containing the dimensions of the inputs + * \return Dimension of the output + */ + virtual Dim dim_forward(const std::vector& xs) const = 0; + + // for debugging + /** + * \brief Returns important information for debugging + * \details See nodes-conv.cc for examples + * + * \param args String descriptions of the arguments + * \return String description of the node + */ + virtual std::string as_string(const std::vector& args) const = 0; + + // in general, this will return an empty size, but if a component needs to store + // extra information in the forward pass for use in the backward pass, it can + // request the memory here (nb. you could put it on the Node object, but in general, + // edges should not allocate tensor memory since memory is managed centrally for the + // entire computation graph). + /** + * \brief Size of the auxiliar storage + * \details in general, this will return an empty size, but if a component needs to store extra information in the forward pass for use in the backward pass, it can request the memory here (nb. you could put it on the Node object, but in general, edges should not allocate tensor memory since memory is managed centrally for the entire computation graph). + * \return Size + */ + virtual size_t aux_storage_size() const; + + + // computation + /** + * \brief Forward computation + * \details This function contains the logic for the forward pass. Some implementation remarks from nodes.cc: + * 1. fx can be understood as a pointer to the (preallocated) location for the result of forward to be stored + * 2. fx is not initialized, so after calling forward fx must point to the correct answer + * 3. fx can be repointed to an input, if forward(x) evaluates to x (e.g., in reshaping) + * 4. scalars results of forward are placed in fx.v[0] + * 5. DYNET manages its own memory, not Eigen, and it is configured with the EIGEN_NO_MALLOC option. If you get an error about Eigen attempting to allocate memory, it is (probably) because of an implicit creation of a temporary variable. To tell Eigen this is not necessary, the noalias() method is available. If you really do need a temporary variable, its capacity must be requested by Node::aux_storage_size + * + * Note on debugging problems with differentiable components + * + * - fx is uninitialized when forward is called- are you relying on it being 0? + * + * \param xs Pointers to the inputs + * \param fx pointer to the (preallocated) location for the result of forward to be stored + */ + virtual void forward_impl(const std::vector& xs, + Tensor& fx) const = 0; + // + /** + * \brief Accumulates the derivative of E with respect to the ith argument to f, that is, xs[i] + * \details This function contains the logic for the backward pass. Some implementation remarks from nodes.cc: + * 1. dEdxi MUST **ACCUMULATE** a result since multiple calls to forward may depend on the same x_i. Even, e.g., Identity must be implemented as dEdx1 += dEdf. THIS IS EXTREMELY IMPORTANT + * 2. scalars results of forward are placed in fx.v[0] + * 3. DYNET manages its own memory, not Eigen, and it is configured with the EIGEN_NO_MALLOC option. If you get an error about Eigen attempting to allocate memory, it is (probably) because of an implicit creation of a temporary variable. To tell Eigen this is not necessary, the noalias() method is available. If you really do need a temporary variable, its capacity must be requested by Node::aux_storage_size + * + * Note on debugging problems with differentiable components + * + * - dEdxi must accummulate (see point 4 above!) + * + * \param xs Pointers to inputs + * \param fx Output + * \param dEdf Gradient of the objective w.r.t the output of the node + * \param i Index of the input w.r.t which we take the derivative + * \param dEdxi Gradient of the objective w.r.t the input of the node + */ + virtual void backward_impl(const std::vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const = 0; + + /** + * \brief Whether this node supports computing multiple batches in one call. + * \details If true, forward and backward will be called once with a multi-batch tensor. If false, forward and backward will be called multiple times for each item. + * \return Support for multibatch + */ + virtual bool supports_multibatch() const { return false; } + + // perform the forward/backward passes in one or multiple calls + /** + * \brief perform the forward/backward passes in one or multiple calls + * + * \param xs Pointers to the inputs + * \param fx pointer to the (preallocated) location for the result of forward to be stored + */ + virtual void forward(const std::vector& xs, + Tensor& fx) const final; + /** + * \brief perform the backward passes in one or multiple calls + * + * \param xs Pointers to inputs + * \param fx Output + * \param dEdf Gradient of the objective w.r.t the output of the node + * \param i Index of the input w.r.t which we take the derivative + * \param dEdxi Gradient of the objective w.r.t the input of the node + */ + virtual void backward(const std::vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const final; + + // + /** + * \brief Number of arguments to the function + * \return Arity of the function + */ + inline unsigned arity() const { return args.size(); } + + inline void set_cg(ComputationGraph* cg) { cg_ = cg; } + + inline ComputationGraph* get_cg() const { + if (cg_) return cg_; + else return NULL; + } + + std::vector args;/**< Dependency structure */ + + // memory size + Dim dim; /**< Will be .size() = 0 initially filled in by forward() -- TODO fix this */ + + Device* device; /**< pointer to the node, or null to inherit device from first input, or default when there is no input */ + +protected: + Node() : args(), device(default_device) {} + explicit Node(const std::initializer_list& a) : args(a), device(default_device) {} + template + explicit Node(const T&c) : args(c.begin(), c.end()), device(default_device) {} + +private: + ComputationGraph* cg_; // pointer to the computation graph + +public: + // auxiliary memory + mutable void* aux_mem; /**< this will usually be null. but, if your node needs to store intermediate values between forward and backward, you can use store it here. request the number of bytes you need from aux_storage_size(). Note: this memory will be on the CPU or GPU, depending on your computation backend*/ +}; + +template +inline VariableIndex ComputationGraph::add_function(const std::initializer_list& arguments) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new Function(arguments)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +// pass side information to the function. these are likely to be nondifferentiable arguments +template +inline VariableIndex ComputationGraph::add_function(const std::initializer_list& arguments, + Args&&... side_information) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new Function(arguments, std::forward(side_information)...)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +template +inline VariableIndex ComputationGraph::add_function(const T& arguments) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new Function(arguments)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +// pass side information to the function. these are likely to be nondifferentiable arguments +template +inline VariableIndex ComputationGraph::add_function(const T& arguments, + Args&&... side_information) { + VariableIndex new_node_index(nodes.size()); + nodes.push_back(new Function(arguments, std::forward(side_information)...)); + set_dim_for_new_node(new_node_index); + return new_node_index; +} + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/except.h b/thirdparty/dynet/dynet/except.h new file mode 100644 index 000000000..fd8a6f210 --- /dev/null +++ b/thirdparty/dynet/dynet/except.h @@ -0,0 +1,65 @@ +#ifndef DYNET_EXCEPT_H_ +#define DYNET_EXCEPT_H_ + +#include +#include + +namespace dynet { + +// if DYNET exhausts its memory pool +class out_of_memory : public std::runtime_error { + public: + out_of_memory(const std::string& what_arg) : runtime_error(what_arg) {} +}; + +// this error occurs when some logic is +// attempted to execut on a CUDA backend but the +// logic has not been implemented. +class cuda_not_implemented : public std::logic_error { + public: + cuda_not_implemented(const std::string& what_arg) : logic_error(what_arg) {} +}; + +// this is thrown when cuda returns an error (bad arguments, memory, state, etc) +class cuda_exception : public std::runtime_error { + public: + cuda_exception(const std::string& what_arg) : runtime_error(what_arg) {} +}; +} // namespace dynet + +#ifdef DYNET_SKIP_ARG_CHECK + #define DYNET_INVALID_ARG(msg) + #define DYNET_ARG_CHECK(cond, msg) +#else + #define DYNET_INVALID_ARG(msg) do { \ + std::ostringstream oss; \ + oss << msg; \ + throw std::invalid_argument(oss.str()); \ + } while (0); + + #define DYNET_ARG_CHECK(cond, msg) do { \ + if (!(cond)) { \ + std::ostringstream oss; \ + oss << msg; \ + throw std::invalid_argument(oss.str()); } \ + } while (0); +#endif + +#ifdef DYNET_DO_ASSERT + #define DYNET_ASSERT(expr, msg) do { \ + if(!(expr)) { \ + std::ostringstream oss; \ + oss << msg; \ + throw std::runtime_error(oss.str()); } \ + } while (0); +#else + #define DYNET_ASSERT(expr, msg) +#endif + +#define DYNET_RUNTIME_ERR(msg) do { \ + std::ostringstream oss; \ + oss << msg; \ + throw std::runtime_error(oss.str()); } \ + while (0); + +#endif diff --git a/thirdparty/dynet/dynet/exec.cc b/thirdparty/dynet/dynet/exec.cc new file mode 100644 index 000000000..f2d3e57c2 --- /dev/null +++ b/thirdparty/dynet/dynet/exec.cc @@ -0,0 +1,180 @@ +#include "dynet/exec.h" + +#include "dynet/param-nodes.h" +#include "dynet/globals.h" + +using namespace std; + +namespace dynet { + +ExecutionEngine::~ExecutionEngine() {} + +void SimpleExecutionEngine::invalidate() { + num_nodes_evaluated = 0; + backward_computed = 0; +} + +void SimpleExecutionEngine::invalidate(unsigned i) { + num_nodes_evaluated = i; +} + +const Tensor& SimpleExecutionEngine::forward() { + const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1); + return forward(node_max_index); +} + +const Tensor& SimpleExecutionEngine::forward(VariableIndex i) { + invalidate(); + return incremental_forward(i); +} + +const Tensor& SimpleExecutionEngine::get_value(VariableIndex i) { + DYNET_ASSERT(i < cg.nodes.size(), "Out-of-bounds variable access in SimpleExecutionEngine::get_value()"); + if (i >= num_nodes_evaluated) { + incremental_forward(); + } + return nfxs[i]; +} + +const Tensor& SimpleExecutionEngine::get_gradient(VariableIndex i) { + DYNET_ASSERT(i < cg.nodes.size(), "Out-of-bounds variable access in SimpleExecutionEngine::get_value()"); + if (i >= backward_computed) { + DYNET_RUNTIME_ERR("Requested gradient for node " << i << ", but backward pass was computed from node " << (backward_computed - 1)); + } + return ndEdfs[i]; +} + +const Tensor& SimpleExecutionEngine::incremental_forward() { + const VariableIndex node_max_index = (VariableIndex)(cg.nodes.size() - 1); + return incremental_forward(node_max_index); +} + +const Tensor& SimpleExecutionEngine::incremental_forward(VariableIndex i) { + DYNET_ASSERT(i < cg.nodes.size(), "Out-of-bounds variable access in SimpleExecutionEngine::incremental_forward()"); + + // free any old memory if this is a new CG + if (num_nodes_evaluated == 0) + for(Device* dev : dynet::devices) + dev->pools[(int)DeviceMempool::FXS]->free(); + + if (i >= num_nodes_evaluated) { + nfxs.resize(i + 1); + + //vector dummy(5, "x"); + vector xs(16); + for (; num_nodes_evaluated <= i; ++num_nodes_evaluated) { + const Node* node = cg.nodes[num_nodes_evaluated]; + xs.resize(node->arity()); + unsigned ai = 0; + for (VariableIndex arg : node->args) { + xs[ai] = &nfxs[arg]; + ++ai; + } + nfxs[num_nodes_evaluated].d = node->dim; + // Get the device + DYNET_ASSERT(node->device != nullptr, "Attempt to access null device in SimpleExecutionEngine::incremental_forward"); + nfxs[num_nodes_evaluated].device = node->device; + nfxs[num_nodes_evaluated].mem_pool = DeviceMempool::FXS; + // Get the memory + nfxs[num_nodes_evaluated].v = static_cast(nfxs[num_nodes_evaluated].device->pools[(int)DeviceMempool::FXS]->allocate(node->dim.size() * sizeof(float))); + if (nfxs[num_nodes_evaluated].v == nullptr) + DYNET_RUNTIME_ERR("Ran out of memory when executing node " << num_nodes_evaluated); + void* aux_mem = nullptr; + size_t aux_size = node->aux_storage_size(); + if (aux_size) { + aux_mem = nfxs[num_nodes_evaluated].device->pools[(int)DeviceMempool::FXS]->allocate(aux_size); + if (!aux_mem) + DYNET_RUNTIME_ERR("Ran out of auxiliary memory when executing node " << num_nodes_evaluated); + } + node->aux_mem = aux_mem; + + node->forward(xs, nfxs[num_nodes_evaluated]); + } + } + return nfxs[i]; +} + +void SimpleExecutionEngine::backward(bool full) { + DYNET_ASSERT(nfxs.size() >= cg.nodes.size(), "Mismatched array sizes in SimpleExecutionEngine::backward"); + backward((VariableIndex)(cg.nodes.size()-1),full); +} + +// TODO what is happening with parameter nodes if from_where > param_node_id ? +void SimpleExecutionEngine::backward(VariableIndex from_where, bool full) { + if(!(from_where < nfxs.size())) + incremental_forward(from_where); + if (nfxs[from_where].d.size() != 1) + DYNET_INVALID_ARG("backward() can only be called on scalar nodes, but node " << from_where << " has dimension: " << nfxs[from_where].d); + + const unsigned num_nodes = from_where+1; + ndEdfs.resize(num_nodes); + for(Device* device : devices) + device->pools[(int)DeviceMempool::DEDFS]->free(); + for (unsigned i = 0; i < num_nodes; ++i) { + const auto dim = nfxs[i].d; + ndEdfs[i].d = dim; + ndEdfs[i].device = nfxs[i].device; + ndEdfs[i].mem_pool = DeviceMempool::DEDFS; + ndEdfs[i].v = static_cast(ndEdfs[i].device->pools[(int)DeviceMempool::DEDFS]->allocate(dim.size() * sizeof(float))); + if (!ndEdfs[i].v) + DYNET_RUNTIME_ERR("out of memory while attempting to allocate space for derivatives of node " << i); + } + for(Device* device : devices) + device->pools[(int)DeviceMempool::DEDFS]->zero_allocated_memory(); + // initialize dE/dE = 1 + ndEdfs.back().v = kSCALAR_ONE; + + // here we find constant paths to avoid doing extra work + // by default, a node is constant unless + // 1) it is a parameter node + // 2) it depends on a non-constant node + // (thus, functions of constants and inputs end up being + // false in this computation) + vector needs_derivative(num_nodes, full); + if (!full) { + for (auto i : cg.parameter_nodes) + needs_derivative[i] = true; + + for (unsigned ni = 0; ni < num_nodes; ++ni) { + bool nd = needs_derivative[ni]; + for (auto arg : cg.nodes[ni]->args) + nd |= needs_derivative[arg]; + needs_derivative[ni] = nd; + } + } + + // loop in reverse topological order + // consider only nodes that participate in the computation. + vector in_computation(num_nodes, false); + in_computation[num_nodes - 1] = true; + vector xs; + for (int i = num_nodes - 1; i >= 0; --i) { + if (!in_computation[i]) continue; + const Node* node = cg.nodes[i]; + xs.resize(node->arity()); + unsigned ai = 0; + for (VariableIndex arg : node->args) { + in_computation[arg] = true; + xs[ai] = &nfxs[arg]; + ++ai; + } + ai = 0; + for (VariableIndex arg : node->args) { + if (needs_derivative[arg]) { + node->backward(xs, nfxs[i], ndEdfs[i], ai, ndEdfs[arg]); + } + ++ai; + } + } + + // accumulate gradients into parameters + // this is simpler than you might find in some other frameworks + // since we assume parameters come into the graph as a "function" + // that returns the current value of the parameters + for (VariableIndex i : cg.parameter_nodes) + static_cast(cg.nodes[i])->accumulate_grad(ndEdfs[i]); + // We use this because from_where + 1 because 0 corresponds to "backward wasn't computed" + backward_computed = from_where + 1; +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/exec.h b/thirdparty/dynet/dynet/exec.h new file mode 100644 index 000000000..a8ad1b8aa --- /dev/null +++ b/thirdparty/dynet/dynet/exec.h @@ -0,0 +1,48 @@ +#ifndef DYNET_EXEC_H +#define DYNET_EXEC_H + +#include "dynet/dynet.h" + +namespace dynet { + +class ExecutionEngine { + public: + virtual ~ExecutionEngine(); + virtual void invalidate() = 0; + virtual void invalidate(unsigned) = 0; + virtual const Tensor& forward() = 0; + virtual const Tensor& forward(VariableIndex i) = 0; + virtual const Tensor& incremental_forward() = 0; // if you want to add nodes and evaluate just the new parts + virtual const Tensor& incremental_forward(VariableIndex i) = 0; + virtual const Tensor& get_value(VariableIndex i) = 0; + virtual const Tensor& get_gradient(VariableIndex i) = 0; + virtual void backward(bool full = false) = 0; + virtual void backward(VariableIndex i, bool full = false) = 0; + protected: + explicit ExecutionEngine(const ComputationGraph& cg) : cg(cg) {} + const ComputationGraph& cg; + VariableIndex backward_computed; +}; + +class SimpleExecutionEngine : public ExecutionEngine { + public: + explicit SimpleExecutionEngine(const ComputationGraph& cg) : ExecutionEngine(cg) {} + void invalidate() override; + void invalidate(unsigned i) override; + const Tensor& forward() override; + const Tensor& forward(VariableIndex i) override; + const Tensor& incremental_forward() override; // if you want to add nodes and evaluate just the new parts + const Tensor& incremental_forward(VariableIndex i) override; + const Tensor& get_value(VariableIndex i) override; + const Tensor& get_gradient(VariableIndex i) override; + void backward(bool full = false) override; + void backward(VariableIndex i, bool full = false) override; + private: + std::vector nfxs; + std::vector ndEdfs; + VariableIndex num_nodes_evaluated; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/expr.cc b/thirdparty/dynet/dynet/expr.cc new file mode 100644 index 000000000..4f81cc833 --- /dev/null +++ b/thirdparty/dynet/dynet/expr.cc @@ -0,0 +1,191 @@ +#include "dynet/expr.h" + +#include + +#include "dynet/nodes.h" +#include "dynet/nodes-conv.h" + +namespace dynet { +namespace expr { + +using std::vector; + +Expression input(ComputationGraph& g, real s) { return Expression(&g, g.add_input(s)); } +Expression input(ComputationGraph& g, const real *ps) { return Expression(&g, g.add_input(ps)); } +Expression input(ComputationGraph& g, const Dim& d, const vector& data) { return Expression(&g, g.add_input(d, data)); } +Expression input(ComputationGraph& g, const Dim& d, const vector* pdata) { return Expression(&g, g.add_input(d, pdata)); } +Expression input(ComputationGraph& g, const Dim& d, const vector& ids, const vector& data, float defdata) { return Expression(&g, g.add_input(d, ids, data, defdata)); } +Expression const_parameter(ComputationGraph& g, Parameter p) { return Expression(&g, g.add_const_parameters(p)); } +Expression const_parameter(ComputationGraph& g, LookupParameter p) { return Expression(&g, g.add_const_parameters(p)); } +Expression parameter(ComputationGraph& g, Parameter p) { return Expression(&g, g.add_parameters(p)); } +Expression parameter(ComputationGraph& g, LookupParameter p) { return Expression(&g, g.add_parameters(p)); } +Expression lookup(ComputationGraph& g, LookupParameter p, unsigned index) { return Expression(&g, g.add_lookup(p, index)); } +Expression lookup(ComputationGraph& g, LookupParameter p, const unsigned* pindex) { return Expression(&g, g.add_lookup(p, pindex)); } +Expression lookup(ComputationGraph& g, LookupParameter p, const vector& indices) { return Expression(&g, g.add_lookup(p, indices)); } +Expression lookup(ComputationGraph& g, LookupParameter p, const vector* pindices) { return Expression(&g, g.add_lookup(p, pindices)); } +Expression const_lookup(ComputationGraph& g, LookupParameter p, unsigned index) { return Expression(&g, g.add_const_lookup(p, index)); } +Expression const_lookup(ComputationGraph& g, LookupParameter p, const unsigned* pindex) { return Expression(&g, g.add_const_lookup(p, pindex)); } +Expression const_lookup(ComputationGraph& g, LookupParameter p, const vector& indices) { return Expression(&g, g.add_const_lookup(p, indices)); } +Expression const_lookup(ComputationGraph& g, LookupParameter p, const vector* pindices) { return Expression(&g, g.add_const_lookup(p, pindices)); } +Expression zeroes(ComputationGraph& g, const Dim& d) { return Expression(&g, g.add_function(d)); } +Expression random_normal(ComputationGraph& g, const Dim& d) { return Expression(&g, g.add_function(d)); } +Expression random_bernoulli(ComputationGraph& g, const Dim& d, real p, real scale) { return Expression(&g, g.add_function({}, d, p, scale)); } +Expression random_uniform(ComputationGraph& g, const Dim& d, real left, real right) { return Expression(&g, g.add_function({}, d, left, right)); } +Expression random_gumbel(ComputationGraph& g, const Dim& d, real mu, real beta) { return Expression(&g, g.add_function({}, d, mu, beta)); } + +// identity function, but derivative is not propagated through it +Expression nobackprop(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +// identity function, but derivative is propagated as negative +Expression flip_gradient(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } + +Expression operator-(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression operator+(const Expression& x, const Expression& y) { + if (x.dim().batch_size() == 1) + return Expression(x.pg, x.pg->add_function({y.i, x.i})); + else if (y.dim().batch_size() == 1) + return Expression(x.pg, x.pg->add_function({x.i, y.i})); + else + return Expression(x.pg, x.pg->add_function({x.i, y.i})); +} +Expression operator+(real x, const Expression& y) { return Expression(y.pg, y.pg->add_function({y.i}, x)); } +Expression operator+(const Expression& x, real y) { return y + x; } +Expression operator-(const Expression& x, const Expression& y) { return x + (-y); } +Expression operator-(real x, const Expression& y) { return Expression(y.pg, y.pg->add_function({y.i}, x)); } +Expression operator-(const Expression& x, real y) { return -(y - x); } +Expression operator*(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression operator*(const Expression& x, float y) { return Expression(x.pg, x.pg->add_function({x.i}, y)); } +Expression cmult(const Expression& x, const Expression& y) { + if (x.dim().batch_size() == 1) + return Expression(x.pg, x.pg->add_function({x.i, y.i})); + else if(y.dim().batch_size() == 1) + return Expression(x.pg, x.pg->add_function({y.i, x.i})); + else + return Expression(x.pg, x.pg->add_function({x.i, y.i})); +} +Expression cdiv(const Expression& x, const Expression& y) { + if(y.dim().batch_size()==1) + return Expression(x.pg, x.pg->add_function({x.i, y.i})); + else + return Expression(x.pg, x.pg->add_function({x.i, y.i})); +} +Expression colwise_add(const Expression& x, const Expression& bias) { return Expression(x.pg, x.pg->add_function({x.i, bias.i})); } +Expression contract3d_1d_1d(const Expression& x, const Expression& y, const Expression& z) { return Expression(x.pg, x.pg->add_function({x.i, y.i, z.i})); } +Expression contract3d_1d_1d(const Expression& x, const Expression& y, const Expression& z, const Expression& b) { return Expression(x.pg, x.pg->add_function({x.i, y.i, z.i, b.i})); } +Expression contract3d_1d(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression contract3d_1d(const Expression& x, const Expression& y, const Expression& b) { return Expression(x.pg, x.pg->add_function({x.i, y.i, b.i})); } + +Expression sqrt(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression abs(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression erf(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression tanh(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression lgamma(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression log(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression exp(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression square(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression cube(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression logistic(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression rectify(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression hinge(const Expression& x, unsigned index, float m) { return Expression(x.pg, x.pg->add_function({x.i}, index, m)); } +Expression hinge(const Expression& x, const unsigned* pindex, float m) { return Expression(x.pg, x.pg->add_function({x.i}, pindex, m)); } +Expression hinge(const Expression& x, const std::vector & indices, float m) { return Expression(x.pg, x.pg->add_function({x.i}, indices, m)); } +Expression hinge(const Expression& x, const std::vector * pindices, float m) { return Expression(x.pg, x.pg->add_function({x.i}, pindices, m)); } +Expression log_softmax(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression log_softmax(const Expression& x, const vector& d) { return Expression(x.pg, x.pg->add_function({x.i}, d)); } +Expression sparsemax(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression sparsemax_loss(const Expression& x, const vector& target_support) { return Expression(x.pg, x.pg->add_function({x.i}, target_support)); } +Expression sparsemax_loss(const Expression& x, const vector* ptarget_support) { return Expression(x.pg, x.pg->add_function({x.i}, ptarget_support)); } +Expression softmax(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression softsign(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression pow(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression min(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression max(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression noise(const Expression& x, real stddev) { return Expression(x.pg, x.pg->add_function({x.i}, stddev)); } +Expression dropout(const Expression& x, real p) { return Expression(x.pg, x.pg->add_function({x.i}, p)); } +Expression dropout_batch(const Expression& x, real p) { return Expression(x.pg, x.pg->add_function({x.i}, p)); } +Expression dropout_dim(const Expression& x, unsigned d, real p) { return Expression(x.pg, x.pg->add_function({x.i}, d, p)); } +Expression block_dropout(const Expression& x, real p) { return Expression(x.pg, x.pg->add_function({x.i}, p)); } + +Expression reshape(const Expression& x, const Dim& d) { return Expression(x.pg, x.pg->add_function({x.i}, d)); } +Expression transpose(const Expression& x, const vector& dims) { return Expression(x.pg, x.pg->add_function({x.i}, dims)); } +Expression select_rows(const Expression& x, const vector& rows) { return Expression(x.pg, x.pg->add_function({x.i}, rows)); } +Expression select_rows(const Expression& x, const vector* prows) { return Expression(x.pg, x.pg->add_function({x.i}, prows)); } +Expression select_cols(const Expression& x, const vector& cols) { return Expression(x.pg, x.pg->add_function({x.i}, cols)); } +Expression select_cols(const Expression& x, const vector* pcols) { return Expression(x.pg, x.pg->add_function({x.i}, pcols)); } +Expression inverse(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression logdet(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } + +Expression trace_of_product(const Expression& x, const Expression& y) {return Expression(x.pg, x.pg->add_function({x.i, y.i}));} + +Expression squared_norm(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } + +Expression dot_product(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression squared_distance(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression huber_distance(const Expression& x, const Expression& y, real c) { return Expression(x.pg, x.pg->add_function({x.i, y.i}, c)); } +Expression l1_distance(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression binary_log_loss(const Expression& x, const Expression& y) { return Expression(x.pg, x.pg->add_function({x.i, y.i})); } +Expression pairwise_rank_loss(const Expression& x, const Expression& y, real m) { return Expression(x.pg, x.pg->add_function({x.i, y.i}, m)); } +Expression poisson_loss(const Expression& x, unsigned y) { return Expression(x.pg, x.pg->add_function({x.i}, y)); } +Expression poisson_loss(const Expression& x, const unsigned* py) { return Expression(x.pg, x.pg->add_function({x.i}, py)); } + +//Expression conv1d_narrow(const Expression& x, const Expression& f) { return Expression(x.pg, x.pg->add_function({x.i, f.i})); } +//Expression conv1d_wide(const Expression& x, const Expression& f) { return Expression(x.pg, x.pg->add_function({x.i, f.i})); } +Expression filter1d_narrow(const Expression& x, const Expression& f) { return Expression(x.pg, x.pg->add_function({x.i, f.i})); } +Expression kmax_pooling(const Expression& x, unsigned k, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, k, d)); } +Expression fold_rows(const Expression& x, unsigned nrows) { return Expression(x.pg, x.pg->add_function({x.i}, nrows)); } +Expression conv2d(const Expression& x, const Expression& f, const std::vector& stride, bool is_valid) { return Expression(x.pg, x.pg->add_function({x.i, f.i}, stride, is_valid)); } +Expression conv2d(const Expression& x, const Expression& f, const Expression& b, const std::vector& stride, bool is_valid) { + return Expression(x.pg, x.pg->add_function({x.i, f.i, b.i}, stride, is_valid)); +} + +Expression pick(const Expression& x, unsigned v, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, v, d)); } +Expression pick(const Expression& x, const vector & v, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, v, d)); } +Expression pick(const Expression& x, const unsigned* pv, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, pv, d)); } +Expression pick(const Expression& x, const vector * pv, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, pv, d)); } + +Expression pick_batch_elem(const Expression& x, unsigned v) { return Expression(x.pg, x.pg->add_function({x.i}, v)); } +Expression pick_batch_elems(const Expression& x, const std::vector& v) { return Expression(x.pg, x.pg->add_function({x.i}, v)); } +Expression pick_batch_elem(const Expression& x, const unsigned* pv) { return Expression(x.pg, x.pg->add_function({x.i}, pv)); } +Expression pick_batch_elems(const Expression& x, const vector * pv) { return Expression(x.pg, x.pg->add_function({x.i}, pv)); } + +Expression pick_range(const Expression& x, unsigned v, unsigned u, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, v, u, d)); } +Expression pickrange(const Expression& x, unsigned v, unsigned u) { + std::cerr << "WARNING: The function naming pickrange() has been deprecated. Please use pick_range() instead." << std::endl; + return Expression(x.pg, x.pg->add_function({x.i}, v, u, 0)); +} + +Expression pickneglogsoftmax(const Expression& x, unsigned v) { return Expression(x.pg, x.pg->add_function({x.i}, v)); } +Expression pickneglogsoftmax(const Expression& x, const vector & v) { return Expression(x.pg, x.pg->add_function({x.i}, v)); } +Expression pickneglogsoftmax(const Expression& x, const unsigned* pv) { return Expression(x.pg, x.pg->add_function({x.i}, pv)); } +Expression pickneglogsoftmax(const Expression& x, const vector * pv) { return Expression(x.pg, x.pg->add_function({x.i}, pv)); } + +Expression average_cols(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression sum_dim(const Expression& x, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, d)); } +Expression sum_rows(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i}, 0)); } +Expression sum_cols(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i}, 1)); } +Expression sum_elems(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression mean_elems(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i}, 1)); } +Expression moment_elems(const Expression& x, unsigned r) { return Expression(x.pg, x.pg->add_function({x.i}, r)); } +Expression std_elems(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } + +Expression sum_batches(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } +Expression moment_batches(const Expression& x, unsigned r) { return Expression(x.pg, x.pg->add_function({x.i}, r)); } +Expression mean_batches(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i}, 1)); } +Expression std_batches(const Expression& x) { return Expression(x.pg, x.pg->add_function({x.i})); } + +Expression mean_dim(const Expression& x, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, d, 1)); } +Expression moment_dim(const Expression& x, unsigned d, unsigned r) { return Expression(x.pg, x.pg->add_function({x.i}, d, r)); } +Expression std_dim(const Expression& x, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, d)); } + +Expression kmh_ngram(const Expression& x, unsigned n) { return Expression(x.pg, x.pg->add_function({x.i}, n)); } + +Expression max_dim(const Expression& x, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, d)); } +Expression min_dim(const Expression& x, unsigned d) { return Expression(x.pg, x.pg->add_function({x.i}, d)); } + +Expression layer_norm(const Expression& x, const Expression& g, const Expression& b){ + Expression mu = mean_elems(x); + Expression x_centered= x - mu; + Expression sigma = std_elems(x); + return cmult(g, cdiv(x_centered,sigma + 1e-8)) + b; +} +} +} diff --git a/thirdparty/dynet/dynet/expr.h b/thirdparty/dynet/dynet/expr.h new file mode 100644 index 000000000..fec5806f4 --- /dev/null +++ b/thirdparty/dynet/dynet/expr.h @@ -0,0 +1,2006 @@ +/** + * \file expr.h + * \defgroup operations operations + * \defgroup inputoperations inputoperations + * \defgroup arithmeticoperations arithmeticoperations + * \defgroup lossoperations lossoperations + * \defgroup flowoperations flowoperations + * \defgroup noiseoperations noiseoperations + * \defgroup convolutionoperations convolutionoperations + * \defgroup tensoroperations tensoroperations + * \defgroup linalgoperations linalgoperations + * \defgroup normoperations normoperations + * \brief The various operations that you can use in building a DyNet graph + * + * \details TODO: **This documentation is incomplete. See expr.h for a full list of expressions.** + */ + +#ifndef DYNET_EXPR_H +#define DYNET_EXPR_H + +#include "dynet/dynet.h" +#include "dynet/nodes.h" +#include "dynet/nodes-contract.h" +#include + + +namespace dynet { +namespace expr { +/** + * \ingroup operations + * \brief Expressions are the building block of a Dynet computation graph + * \details [long description] + */ +struct Expression { + ComputationGraph *pg; + VariableIndex i; + unsigned graph_id; + + Expression() : pg(nullptr), i(0), graph_id(0) { } + const bool is_stale() const {return (get_number_of_active_graphs() != 1 || graph_id != get_current_graph_id());} + /** + * \brief Base expression constructor + * \details Used when creating operations + * + * \param pg Pointer to the computation graph + * \param i Variable index + * \param name Name of the expression + */ + Expression(ComputationGraph *pg, VariableIndex i) : pg(pg), i(i), graph_id(pg->get_id()) { } + /** + * \brief Get value of the expression + * \details Throws a tuntime_error exception if no computation graph is available + * \return Value of the expression as a tensor + */ + const Tensor& value() const { + if (this->is_stale()) { + throw std::runtime_error("Attempt to use a stale expression."); + } + return pg->get_value(i); + } + /** + * \brief Get gradient of the expression + * \details Throws a tuntime_error exception if no computation graph is available + * + * Make sure to call `backward` on a downstream expression before calling this. + * + * If the expression is a constant expression (meaning it's not a function of a parameter), dynet won't compute it's gradient for the sake of efficiency. You need to manually force the gradient computation by adding the agument `full=true` to `backward` + + * \return Value of the expression as a tensor + */ + const Tensor& gradient() const { + if (this->is_stale()) { + throw std::runtime_error("Attempt to use a stale expression."); + } + return pg->get_gradient(i); + } + /** + * \brief Get dimension of the expression + * \details Throws a tuntime_error exception if no computation graph is available + * \return Dimension of the expression + */ + const Dim& dim() const { + if (this->is_stale()) { + throw std::runtime_error("Attempt to use a stale expression."); + } + return pg->get_dimension(i); + } +}; + +namespace detail { +template +Expression f(const T& xs) { + ComputationGraph *pg = xs.begin()->pg; + std::vector xis(xs.size()); + int i = 0; + for (auto xi = xs.begin(); xi != xs.end(); ++xi) xis[i++] = xi->i; + return Expression(pg, pg->add_function(xis)); +} +template +Expression f(const T& xs, const T1& arg1) { + ComputationGraph *pg = xs.begin()->pg; + std::vector xis(xs.size()); + int i = 0; + for (auto xi = xs.begin(); xi != xs.end(); ++xi) xis[i++] = xi->i; + return Expression(pg, pg->add_function(xis, arg1)); +} +} + +//////////////////////////////////////////////// +// Input operations // +//////////////////////////////////////////////// + +/** + * \ingroup inputoperations + * \brief Scalar input + * \details Create an expression that represents the scalar value s + * + * \param g Computation graph + * \param s Real number + * + * \return An expression representing s + */ +Expression input(ComputationGraph& g, real s); + +/** + * \ingroup inputoperations + * \brief Modifiable scalar input + * \details Create an expression that represents the scalar value *ps. + * If *ps is changed and the computation graph recalculated, the + * next forward pass will reflect the new value. + * + * \param g Computation graph + * \param ps Real number pointer + * + * \return An expression representing *ps + */ +Expression input(ComputationGraph& g, const real *ps); + +/** + * \ingroup inputoperations + * \brief Vector/matrix/tensor input + * \details Create an expression that represents a vector, matrix, or tensor + * input. The dimensions of the input are defined by ``d``. So for example + * > ``input(g,{50},data)``: will result in a 50-length vector + * > ``input(g,{50,30},data)``: will result in a 50x30 matrix + * and so on, for an arbitrary number of dimensions. + * This function can also be used to import minibatched inputs. For example, + * if we have 10 examples in a minibatch, each with size 50x30, then we call + * > ``input(g,Dim({50,30},10),data)`` + * The data vector "data" will contain the values used to fill the input, in + * column-major format. The length must add to the product of all dimensions in + * d. + * + * \param g Computation graph + * \param d Dimension of the input matrix + * \param data A vector of data points + * + * \return An expression representing data + */ +Expression input(ComputationGraph& g, const Dim& d, const std::vector& data); + +/** + * \ingroup inputoperations + * \brief Updatable vector/matrix/tensor input + * \details Similarly to input that takes a vector reference, input a vector, matrix, + * or tensor input. Because we pass the pointer, the data can be updated. + * + * \param g Computation graph + * \param d Dimension of the input matrix + * \param pdata A pointer to an (updatable) vector of data points + * + * \return An expression representing *pdata + */ +Expression input(ComputationGraph& g, const Dim& d, const std::vector* pdata); + +/** + * \ingroup inputoperations + * \brief Sparse vector input + * \details This operation takes input as a sparse matrix of index/value pairs. It is + * exactly the same as the standard input via vector reference, but sets all + * non-specified values to "defdata" and resets all others to the appropriate + * input values. + * + * \param g Computation graph + * \param d Dimension of the input matrix + * \param ids The indexes of the data points to update + * \param data The data points corresponding to each index + * \param defdata The default data with which to set the unspecified data points + * + * \return An expression representing data + */ +Expression input(ComputationGraph& g, const Dim& d, const std::vector& ids, const std::vector& data, float defdata = 0.f); + +/** + * \ingroup inputoperations + * \brief Load parameter + * \details Load parameters into the computation graph. + * + * \param g Computation graph + * \param p Parameter object to load + * + * \return An expression representing p + */ +Expression parameter(ComputationGraph& g, Parameter p); + +/** + * \ingroup inputoperations + * \brief Load lookup parameter + * \details Load a full tensor of lookup parameters into the computation graph. + * Normally lookup parameters are accessed by using the lookup() function + * to grab a single element. However, in some cases we'll want to access + * all of the parameters in the entire set of lookup parameters for some + * reason. In this case you can use this function. In this case, the + * first dimensions in the returned tensor will be equivalent to the + * dimensions that we would get if we get calling the lookup() function, + * and the size of the final dimension will be equal to the size of the + * vocabulary. + * + * \param g Computation graph + * \param lp LookupParameter object to load + * + * \return An expression representing lp + */ +Expression parameter(ComputationGraph& g, LookupParameter lp); + +/** + * \ingroup inputoperations + * \brief Load constant parameters + * \details Load parameters into the computation graph, but prevent them from being + * updated when performing parameter update. + * + * \param g Computation graph + * \param p Parameter object to load + * + * \return An expression representing the constant p + */ +Expression const_parameter(ComputationGraph& g, Parameter p); + +/** + * \ingroup inputoperations + * \brief Load constant lookup parameters + * \details Load lookup parameters into the computation graph, but prevent them from being + * updated when performing parameter update. + * + * \param g Computation graph + * \param lp LookupParameter object to load + * + * \return An expression representing the constant lp + */ +Expression const_parameter(ComputationGraph& g, LookupParameter lp); + +/** + * \ingroup inputoperations + * \brief Look up parameter + * \details Look up parameters according to an index, and load them into the + * computation graph. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param index Index of the parameters within p + * + * \return An expression representing p[index] + */ +Expression lookup(ComputationGraph& g, LookupParameter p, unsigned index); + +/** + * \ingroup inputoperations + * \brief Look up parameters with modifiable index + * \details Look up parameters according to the *pindex, and load them into the + * computation graph. When *pindex changes, on the next computation of + * forward() the values will change. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param pindex Pointer index of the parameters within p + * + * \return An expression representing p[*pindex] + */ +Expression lookup(ComputationGraph& g, LookupParameter p, const unsigned* pindex); + +/** + * \ingroup inputoperations + * \brief Look up parameter + * \details Look up parameters according to an index, and load them into the + * computation graph. Do not perform gradient update on the parameters. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param index Index of the parameters within p + * + * \return A constant expression representing p[index] + */ +Expression const_lookup(ComputationGraph& g, LookupParameter p, unsigned index); + +/** + * \ingroup inputoperations + * \brief Constant lookup parameters with modifiable index + * \details Look up parameters according to the *pindex, and load them into the + * computation graph. When *pindex changes, on the next computation of + * forward() the values will change. However, gradient updates will not be + performend. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param pindex Pointer index of the parameters within p + * + * \return A constant expression representing p[*pindex] + */ +Expression const_lookup(ComputationGraph& g, LookupParameter p, const unsigned* pindex); + +// Batched versions of lookup and const_lookup + +/** + * \ingroup inputoperations + * \brief Look up parameters + * \details The mini-batched version of lookup. The resulting expression will be + * a mini-batch of parameters, where the "i"th element of the batch corresponds + * to the parameters at the position specified by the "i"th element of + * "indices" + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param indices Index of the parameters at each position in the batch + * + * \return An expression with the "i"th batch element representing p[indices[i]] + */ +Expression lookup(ComputationGraph& g, LookupParameter p, const std::vector& indices); + +/** + * \ingroup inputoperations + * \brief Look up parameters + * \details The mini-batched version of lookup with modifiable parameter indices. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param pindices Pointer to lookup indices + * + * \return An expression with the "i"th batch element representing p[*pindices[i]] + */ +Expression lookup(ComputationGraph& g, LookupParameter p, const std::vector* pindices); + +/** + * \ingroup inputoperations + * \brief Look up parameters + * \details Mini-batched lookup that will not update the parameters. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param indices Lookup indices + * + * \return A constant expression with the "i"th batch element representing p[indices[i]] + */ +Expression const_lookup(ComputationGraph& g, LookupParameter p, const std::vector& indices); + +/** + * \ingroup inputoperations + * \brief Look up parameters + * \details Mini-batched lookup that will not update the parameters, with modifiable + * indices. + * + * \param g Computation graph + * \param p LookupParameter object from which to load + * \param pindices Lookup index pointers. + * + * \return A constant expression with the "i"th batch element representing + * p[*pindices[i]] + */ +Expression const_lookup(ComputationGraph& g, LookupParameter p, const std::vector* pindices); + +/** + * \ingroup inputoperations + * \brief Create an input full of zeros + * \details Create an input full of zeros, sized according to dimensions d. + * + * \param g Computation graph + * \param d The dimensions of the input + * + * \return A "d" dimensioned zero vector + */ +Expression zeroes(ComputationGraph& g, const Dim& d); + +/** + * \ingroup inputoperations + * \brief Create a random normal vector + * \details Create a vector distributed according to normal distribution with mean + * 0, variance 1. + * + * \param g Computation graph + * \param d The dimensions of the input + * + * \return A "d" dimensioned normally distributed vector + */ +Expression random_normal(ComputationGraph& g, const Dim& d); + +/** + * \ingroup inputoperations + * \brief Create a random bernoulli vector + * \details Create a vector distributed according to bernoulli distribution with parameter p. + * + * \param g Computation graph + * \param d The dimensions of the input + * \param p The bernoulli p parameter + * \param scale A scaling factor for the output ("active" elements will receive this value) + * + * \return A "d" dimensioned bernoulli distributed vector + */ +Expression random_bernoulli(ComputationGraph& g, const Dim& d, real p, real scale = 1.0f); + +/** + * \ingroup inputoperations + * \brief Create a random uniform vector + * \details Create a vector distributed according to uniform distribution with boundaries left and right. + * + * \param g Computation graph + * \param d The dimensions of the input + * \param left The left boundary + * \param right The right boundary + * + * \return A "d" dimensioned uniform distributed vector + */ +Expression random_uniform(ComputationGraph& g, const Dim& d, real left, real right); + +/** + * \ingroup inputoperations + * \brief Create a random Gumbel sampled vector + * \details Create a vector distributed according to a Gumbel distribution with the specified parameters. (Currently only the defaults of mu=0.0 and beta=1.0 supported. + * + * \param g Computation graph + * \param d The dimensions of the input + * \param mu The mu parameter + * \param beta The beta parameter + * + * \return A "d" dimensioned Gumbel distributed vector + */ +Expression random_gumbel(ComputationGraph& g, const Dim& d, real mu = 0.0, real beta = 1.0); + +//////////////////////////////////////////////// +// Arithmetic operations // +//////////////////////////////////////////////// + +/** + * \ingroup arithmeticoperations + * \brief Negation + * \details Negate the passed argument. + * + * \param x An input expression + * + * \return The negation of x + */ +Expression operator-(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Expression addition + * \details Add two expressions of the same dimensions. + * + * \param x The first input + * \param y The second input + * + * \return The sum of x and y + */ +Expression operator+(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Scalar addition + * \details Add a scalar to an expression + * + * \param x The expression + * \param y The scalar + * + * \return An expression equal to x, with every component increased by y + */ +Expression operator+(const Expression& x, real y); + +/** + * \ingroup arithmeticoperations + * \brief Scalar addition + * \details Add a scalar to an expression + * + * \param x The scalar + * \param y The expression + * + * \return An expression equal to y, with every component increased by x + */ +Expression operator+(real x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Expression subtraction + * \details Subtract one expression from another. + * + * \param x The expression from which to subtract + * \param y The expression to subtract + * + * \return An expression where the ith element is x_i minus y_i + */ +Expression operator-(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Scalar subtraction + * \details Subtract an expression from a scalar + * + * \param x The scalar from which to subtract + * \param y The expression to subtract + * + * \return An expression where the ith element is x_i minus y + */ +Expression operator-(real x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Scalar subtraction + * \details Subtract a scalar from an expression + * + * \param x The expression from which to subtract + * \param y The scalar to subtract + * + * \return An expression where the ith element is x_i minus y + */ +Expression operator-(const Expression& x, real y); + + +/** + * \ingroup arithmeticoperations + * \brief Matrix multiplication + * \details Multiply two matrices together. Like standard matrix multiplication, the + * second dimension of x and the first dimension of y must match. + * + * \param x The left-hand matrix + * \param y The right-hand matrix + * + * \return An expression x times y + */ +Expression operator*(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Matrix-scalar multiplication + * \details Multiply an expression component-wise by a scalar. + * + * \param x The matrix + * \param y The scalar + * + * \return An expression where the ith element is x_i times y + */ +Expression operator*(const Expression& x, float y); + +/** + * \ingroup arithmeticoperations + * \brief Matrix-scalar multiplication + * \details Multiply an expression component-wise by a scalar. + * + * \param x The scalar + * \param y The matrix + * + * \return An expression where the ith element is x_i times y + */ +inline Expression operator*(float y, const Expression& x) { return x * y; } + +/** + * \ingroup arithmeticoperations + * \brief Matrix-scalar division + * \details Divide an expression component-wise by a scalar. + * + * \param x The matrix + * \param y The scalar + * + * \return An expression where the ith element is x_i divided by y + */ +inline Expression operator/(const Expression& x, float y) { return x * (1.f / y); } + +/** + * \ingroup arithmeticoperations + * \brief Affine transform + * \details This performs an affine transform over an arbitrary (odd) number of expressions + * held in the input initializer list xs. + * The first expression is the "bias," which is added to the expression as-is. + * The remaining expressions are multiplied together in pairs, then added. + * A very common usage case is the calculation of the score for a neural network + * layer (e.g. b + Wz) where b is the bias, W is the weight matrix, and z is the + * input. In this case xs[0] = b, xs[1] = W, and xs[2] = z. + * + * \param xs An initializer list containing an odd number of expressions + * + * \return An expression equal to: xs[0] + xs[1]*xs[2] + xs[3]*xs[4] + ... + */ +inline Expression affine_transform(const std::initializer_list& xs) { return detail::f(xs); } +template +inline Expression affine_transform(const T& xs) { return detail::f(xs); } + +/** + * \ingroup arithmeticoperations + * \brief Sum + * \details This performs an elementwise sum over all the expressions in xs + * + * \param xs An initializer list containing expressions + * + * \return An expression where the ith element is equal to xs[0][i] + xs[1][i] + ... + */ +inline Expression sum(const std::initializer_list& xs) { return detail::f(xs); } +template +inline Expression sum(const T& xs) { return detail::f(xs); } + +/** + * \ingroup arithmeticoperations + * \brief Sum all elements + * \details Sum all the elements in an expression. + * + * \param x The input expression + * + * \return The sum of all of its elements + */ +Expression sum_elems(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Compute moment over all elements + * \details Compute the moment of order \f$r\f$, \f$\frac 1 n\sum_{i=1}^nx_i^r\f$ over all the elements in each batch of the expression + * + * \param x The input mini-batched expression + * \param r Order of the moment + * + * \return A scalar expression (with a potential batch dimension) + */ +Expression moment_elems(const Expression& x, unsigned r); + +/** + * \ingroup arithmeticoperations + * \brief Compute mean over all elements + * \details Computes \f$\frac 1 n\sum_{i=1}^nx_i\f$ over all the elements in each batch of the expression + * + * \param x The input mini-batched expression + * + * \return A scalar expression (with a potential batch dimension) + */ +Expression mean_elems(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Compute Standard deviation over all elements + * \details Computes \f$\frac 1 n\sum_{i=1}^n(x_i -\mu)^2\f$ where \f$\mu=\frac 1 n\sum_{i=1}^nx_i\f$ over all the elements in each batch of the expression + * + * \param x The input mini-batched expression + * + * \return A scalar expression (with a potential batch dimension) + */ +Expression std_elems(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Average + * \details This performs an elementwise average over all the expressions in xs + * + * \param xs An initializer list containing expressions + * + * \return An expression where the ith element is equal to (xs[0][i] + xs[1][i] + ...)/|xs| + */ +inline Expression average(const std::initializer_list& xs) { return detail::f(xs); } +template +inline Expression average(const T& xs) { return detail::f(xs); } + +/** + * \ingroup arithmeticoperations + * \brief Square root + * \details Elementwise square root. + * + * \param x The input expression + * + * \return An expression where the ith element is equal to \f$\sqrt(x_i)\f$ + */ +Expression sqrt(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Absolute value + * \details Elementwise absolute value. + * + * \param x The input expression + * + * \return An expression where the ith element is equal to \f$\vert x_i\vert\f$ + */ +Expression abs(const Expression& x); + + +/** + * \ingroup arithmeticoperations + * \brief Gaussian error function + * \details Elementwise calculation of the Gaussian error function + * + * \param x The input expression + * + * \return An expression where the ith element is equal to erf(x_i) + */ +Expression erf(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Hyperbolic tangent + * \details Elementwise calculation of the hyperbolic tangent + * + * \param x The input expression + * + * \return An expression where the ith element is equal to tanh(x_i) + */ +Expression tanh(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Natural exponent + * \details Calculate elementwise y_i = e^{x_i} + * + * \param x The input expression + * + * \return An expression where the ith element is equal to e^{x_i} + */ +Expression exp(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Square + * \details Calculate elementwise y_i = x_i^2 + * + * \param x The input expression + * + * \return An expression where the ith element is equal to x_i^2 + */ +Expression square(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Cube + * \details Calculate elementwise y_i = x_i^3 + * + * \param x The input expression + * + * \return An expression where the ith element is equal to x_i^3 + */ +Expression cube(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Log gamma + * \details Calculate elementwise y_i = ln(gamma(x_i)) + * + * \param x The input expression + * + * \return An expression where the ith element is equal to ln(gamma(x_i)) + */ +Expression lgamma(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Logarithm + * \details Calculate the elementwise natural logarithm y_i = ln(x_i) + * + * \param x The input expression + * + * \return An expression where the ith element is equal to ln(x_i) + */ +Expression log(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Logistic sigmoid function + * \details Calculate elementwise y_i = 1/(1+e^{-x_i}) + * + * \param x The input expression + * + * \return An expression where the ith element is equal to y_i = 1/(1+e^{-x_i}) + */ +Expression logistic(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Rectifier + * \details Calculate elementwise the recitifer (ReLU) function y_i = max(x_i,0) + * + * \param x The input expression + * + * \return An expression where the ith element is equal to max(x_i,0) + */ +Expression rectify(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Soft Sign + * \details Calculate elementwise the softsign function y_i = x_i/(1+|x_i|) + * + * \param x The input expression + * + * \return An expression where the ith element is equal to x_i/(1+|x_i|) + */ +Expression softsign(const Expression& x); + +/** + * \ingroup arithmeticoperations + * \brief Power function + * \details Calculate an output where the ith element is equal to x_i^y_i + * + * \param x The input expression + * \param y The exponent expression + * + * \return An expression where the ith element is equal to x_i^y_i + */ +Expression pow(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Minimum + * \details Calculate an output where the ith element is min(x_i,y_i) + * + * \param x The first input expression + * \param y The second input expression + * + * \return An expression where the ith element is equal to min(x_i,y_i) + */ +Expression min(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Maximum + * \details Calculate an output where the ith element is max(x_i,y_i) + * + * \param x The first input expression + * \param y The second input expression + * + * \return An expression where the ith element is equal to max(x_i,y_i) + */ +Expression max(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Max + * \details This performs an elementwise max over all the expressions in xs + * + * \param xs An initializer list containing expressions + * + * \return An expression where the ith element is equal to max(xs[0][i], xs[1][i], ...) + */ +inline Expression max(const std::initializer_list& xs) { return detail::f(xs); } +template +inline Expression max(const T& xs) { return detail::f(xs); } + +/** + * \ingroup arithmeticoperations + * \brief Dot Product + * \details Calculate the dot product sum_i x_i*y_i + * + * \param x The input expression + * \param y The input expression + * + * \return An expression equal to the dot product + */ +Expression dot_product(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Componentwise multiply + * \details Do a componentwise multiply where each value is equal to x_i*y_i. + * This function used to be called cwise_multiply. + * + * \param x The first input expression + * \param y The second input expression + * + * \return An expression where the ith element is equal to x_i*y_i + */ +Expression cmult(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Componentwise multiply + * \details Do a componentwise multiply where each value is equal to x_i/y_i + * + * \param x The first input expression + * \param y The second input expression + * + * \return An expression where the ith element is equal to x_i/y_i + */ +Expression cdiv(const Expression& x, const Expression& y); + +/** + * \ingroup arithmeticoperations + * \brief Columnwise addition + * \details Add vector "bias" to each column of matrix "x" + * + * \param x An MxN matrix + * \param bias A length M vector + * + * \return An expression where bias is added to each column of x + */ +Expression colwise_add(const Expression& x, const Expression& bias); + +//////////////////////////////////////////////// +// Probability/loss operations // +//////////////////////////////////////////////// + +/** + * \ingroup lossoperations + * \brief Softmax + * \details The softmax function normalizes each column to ensure that all + * values are between 0 and 1 and add to one by applying the + * e^{x[i]}/{sum_j e^{x[j]}}. + * + * \param x A vector or matrix + * + * \return A vector or matrix after calculating the softmax + */ +Expression softmax(const Expression& x); + +/** + * \ingroup lossoperations + * \brief Log softmax + * \details The log softmax function normalizes each column to ensure that all + * values are between 0 and 1 and add to one by applying the + * e^{x[i]}/{sum_j e^{x[j]}}, then takes the log + * + * \param x A vector or matrix + * + * \return A vector or matrix after calculating the log softmax + */ +Expression log_softmax(const Expression& x); + +/** + * \ingroup lossoperations + * \brief Restricted log softmax + * \details The log softmax function calculated over only a subset of the vector elements. The + * elements to be included are set by the ``restriction`` variable. All elements not + * included in ``restriction`` are set to negative infinity. + * + * \param x A vector over which to calculate the softmax + * \param restriction The elements over which to calculate the softmax + * + * \return A vector with the log softmax over the specified elements + */ +Expression log_softmax(const Expression& x, const std::vector& restriction); + +/** + * \ingroup lossoperations + * \brief Log, sum, exp + * \details The elementwise "logsumexp" function that calculates + * \f$ln(\sum_i e^{xs_i})\f$, used in adding probabilities in the log domain. + * + * \param xs Expressions with respect to which to calculate the logsumexp. + * + * \return The result. + */ +inline Expression logsumexp(const std::initializer_list& xs) { return detail::f(xs); } +template +inline Expression logsumexp(const T& xs) { return detail::f(xs); } + +/** + * \ingroup lossoperations + * \brief Negative softmax log likelihood + * \details This function takes in a vector of scores ``x``, and performs a log softmax, takes + * the negative, and selects the likelihood corresponding to the element ``v``. This is + * perhaps the most standard loss function for training neural networks to predict + * one out of a set of elements. + * + * \param x A vector of scores + * \param v The element with which to calculate the loss + * + * \return The negative log likelihood of element ``v`` after taking the softmax + */ +Expression pickneglogsoftmax(const Expression& x, unsigned v); + +/** + * \ingroup lossoperations + * \brief Modifiable negative softmax log likelihood + * \details This function calculates the negative log likelihood after the softmax with + * respect to index ``*pv``. This computes the same value as the previous function + * that passes the index ``v`` by value, but instead passes by pointer so the value + * ``*pv`` can be modified without re-constructing the computation graph. This can be + * used in situations where we want to create a computation graph once, then feed it + * different data points. + * + * \param x A vector of scores + * \param pv A pointer to the index of the correct element + * + * \return The negative log likelihood of element ``*pv`` after taking the softmax + */ +Expression pickneglogsoftmax(const Expression& x, const unsigned * pv); + +/** + * \ingroup lossoperations + * \brief Batched negative softmax log likelihood + * \details This function is similar to standard pickneglogsoftmax, but calculates loss with + * respect to multiple batch elements. The input will be a mini-batch of score vectors + * where the number of batch elements is equal to the number of indices in ``v``. + * + * \param x An expression with vectors of scores over N batch elements + * \param v A size-N vector indicating the index with respect to all the batch elements + * + * \return The negative log likelihoods over all the batch elements + */ +Expression pickneglogsoftmax(const Expression& x, const std::vector & v); + +/** + * \ingroup lossoperations + * \brief Modifiable batched negative softmax log likelihood + * \details This function is a combination of modifiable pickneglogsoftmax and batched + * pickneglogsoftmax: ``pv`` can be modified without re-creating the computation graph. + * + * \param x An expression with vectors of scores over N batch elements + * \param pv A pointer to the indexes + * + * \return The negative log likelihoods over all the batch elements + */ +Expression pickneglogsoftmax(const Expression& x, const std::vector * pv); + +/** + * \ingroup lossoperations + * \brief Hinge loss + * \details This expression calculates the hinge loss, formally expressed as: + * \f$ \text{hinge}(x,index,m) = \sum_{i \ne index} \max(0, m-x[index]+x[i]). \f$ + * + * \param x A vector of scores + * \param index The index of the correct candidate + * \param m The margin + * + * \return The hinge loss of candidate ``index`` with respect to margin ``m`` + */ +Expression hinge(const Expression& x, unsigned index, float m = 1.0); + +/** + * \ingroup lossoperations + * \brief Modifiable hinge loss + * \details This function calculates the hinge loss with + * with respect to index ``*pindex``. This computes the same value as the previous function + * that passes the index ``index`` by value, but instead passes by pointer so the value + * ``*pindex`` can be modified without re-constructing the computation graph. This can be + * used in situations where we want to create a computation graph once, then feed it + * different data points. + * + * \param x A vector of scores + * \param pindex A pointer to the index of the correct candidate + * \param m The margin + * + * \return The hinge loss of candidate ``*pindex`` with respect to margin ``m`` + */ +Expression hinge(const Expression& x, const unsigned* pindex, float m = 1.0); + +/** + * \ingroup lossoperations + * \brief Batched hinge loss + * \details The same as hinge loss, but for the case where ``x`` is a mini-batched tensor + * with ``indices.size()`` batch elements, and ``indices`` is a vector indicating + * the index of each of the correct elements for these elements. + * + * \param x A mini-batch of vectors with ``indices.size()`` batch elements + * \param indices The indices of the correct candidates for each batch element + * \param m The margin + * + * \return The hinge loss of each mini-batch + */ +Expression hinge(const Expression& x, const std::vector & indices, float m = 1.0); + +/** + * \ingroup lossoperations + * \brief Batched modifiable hinge loss + * \details A combination of the previous batched and modifiable hinge loss functions, where + * vector ``*pindices`` can be modified. + * + * \param x A mini-batch of vectors with ``indices.size()`` batch elements + * \param pindices Pointer to the indices of the correct candidates for each batch element + * \param m The margin + * + * \return The hinge loss of each mini-batch + */ +Expression hinge(const Expression& x, const std::vector * pindices, float m = 1.0); + +/** + * \ingroup lossoperations + * \brief Sparsemax + * \details The sparsemax function (Martins et al. 2016), which is similar to softmax, + * but induces sparse solutions where most of the vector elements are zero. + * **Note:** This function is not yet implemented on GPU. + * + * \param x A vector of scores + * + * \return The sparsemax of the scores + */ +Expression sparsemax(const Expression& x); + +/** + * \ingroup lossoperations + * \brief Sparsemax loss + * \details The sparsemax loss function (Martins et al. 2016), which is similar to + * softmax loss, but induces sparse solutions where most of the vector + * elements are zero. It has a gradient similar to the sparsemax function + * and thus is useful for optimizing when the sparsemax will be used at + * test time. + * **Note:** This function is not yet implemented on GPU. + * + * \param x A vector of scores + * \param target_support The target correct labels. + * + * \return The sparsemax loss of the labels + */ +Expression sparsemax_loss(const Expression& x, const std::vector& target_support); + +/** + * \ingroup lossoperations + * \brief Modifiable sparsemax loss + * \details Similar to the sparsemax loss, but with ptarget_support being a pointer + * to a vector, allowing it to be modified without re-creating the compuation + * graph. + * **Note:** This function is not yet implemented on GPU. + * + * \param x A vector of scores + * \param ptarget_support A pointer to the target correct labels. + * + * \return The sparsemax loss of the labels + */ +Expression sparsemax_loss(const Expression& x, const std::vector* ptarget_support); + +/** + * \ingroup lossoperations + * \brief Squared norm + * \details The squared norm of the values of x: \f$\sum_i x_i^2\f$. + * + * \param x A vector of values + * + * \return The squared norm + */ +Expression squared_norm(const Expression& x); + +/** + * \ingroup lossoperations + * \brief Squared distance + * \details The squared distance between values of ``x`` and ``y``: \f$\sum_i (x_i-y_i)^2\f$. + * + * \param x A vector of values + * \param y Another vector of values + * + * \return The squared distance + */ +Expression squared_distance(const Expression& x, const Expression& y); + +/** + * \ingroup lossoperations + * \brief L1 distance + * \details The L1 distance between values of ``x`` and ``y``: \f$\sum_i |x_i-y_i|\f$. + * + * \param x A vector of values + * \param y Another vector of values + * + * \return The squared distance + */ +Expression l1_distance(const Expression& x, const Expression& y); + +/** + * \ingroup lossoperations + * \brief Huber distance + * \details The huber distance between values of ``x`` and ``y`` parameterized + * by ``c,`` \f$\sum_i L_c(x_i, y_i)\f$ where: + * + * \f$ + * L_c(x, y) = \begin{cases}{lr} + * \frac{1}{2}(y - x)^2 & \textrm{for } |y - f(x)| \le c, \\ + * c\, |y - f(x)| - \frac{1}{2}c^2 & \textrm{otherwise.} + * \end{cases} + * \f$ + * + * \param x A vector of values + * \param y Another vector of values + * \param c The parameter of the huber distance parameterizing the cuttoff + * + * \return The huber distance + */ +Expression huber_distance(const Expression& x, const Expression& y, float c = 1.345f); + +/** + * \ingroup lossoperations + * \brief Binary log loss + * \details The log loss of a binary decision according to the sigmoid + * sigmoid function \f$- \sum_i (y_i * ln(x_i) + (1-y_i) * ln(1-x_i)) \f$ + * + * \param x A vector of values + * \param y A vector of true answers + * + * \return The log loss of the sigmoid function + */ +Expression binary_log_loss(const Expression& x, const Expression& y); + +/** + * \ingroup lossoperations + * \brief Pairwise rank loss + * \details A margin-based loss, where every margin violation for each pair of + * values is penalized: \f$\sum_i max(x_i-y_i+m, 0)\f$ + * + * \param x A vector of values + * \param y A vector of true answers + * \param m The margin + * + * \return The pairwise rank loss + */ +Expression pairwise_rank_loss(const Expression& x, const Expression& y, real m = 1.0); + +/** + * \ingroup lossoperations + * \brief Poisson loss + * \details The negative log probability of ``y`` according to a Poisson + * distribution with parameter ``x``. Useful in Poisson regression + * where, we try to predict the parameters of a Possion distribution + * to maximize the probability of data ``y``. + * + * \param x The parameter of the Poisson distribution. + * \param y The target value + * + * \return The Poisson loss + */ +Expression poisson_loss(const Expression& x, unsigned y); +/** + * \ingroup lossoperations + * \brief Modifiable Poisson loss + * \details Similar to Poisson loss, but with the target value passed by + * pointer so that it can be modified without re-constructing the + * computation graph. + * + * \param x The parameter of the Poisson distribution. + * \param py A pointer to the target value + * + * \return The Poisson loss + */ +Expression poisson_loss(const Expression& x, const unsigned* py); + +//////////////////////////////////////////////// +// Flow operations // +//////////////////////////////////////////////// + +/** + * \ingroup flowoperations + * \brief Prevent backprop + * \details This node has no effect on the forward pass, but prevents gradients from + * flowing backward during the backward pass. This is useful when there's + * a subgraph for which you don't want loss passed back to the parameters. + * + * \param x The input expression + * + * \return The new expression + */ +Expression nobackprop(const Expression& x); + +/** + * \ingroup flowoperations + * \brief Negative backprop + * \details This node has no effect on the forward pass, but takes negative on backprop process. + * This operation is widely used in adversarial networks. + * + * \param x The input expression + * + * \return An output expression containing the same as input (only effects on backprop process) + */ +Expression flip_gradient(const Expression& x); + +/** + * \ingroup flowoperations + * \brief Reshape to another size + * \details This node reshapes a tensor to another size, without changing the + * underlying layout of the data. The layout of the data in DyNet is + * column-major, so if we have a 3x4 matrix + * + * \f$ + * \begin{pmatrix} + * x_{1,1} & x_{1,2} & x_{1,3} & x_{1,4} \\ + * x_{2,1} & x_{2,2} & x_{2,3} & x_{2,4} \\ + * x_{3,1} & x_{3,2} & x_{3,3} & x_{3,4} \\ + * \end{pmatrix} + * \f$ + * + * and transform it into a 2x6 matrix, it will be rearranged as: + * + * \f$ + * \begin{pmatrix} + * x_{1,1} & x_{3,1} & x_{2,2} & x_{1,3} & x_{3,3} & x_{2,4} \\ + * x_{2,1} & x_{1,2} & x_{3,2} & x_{2,3} & x_{1,4} & x_{3,4} \\ + * \end{pmatrix} + * \f$ + * + * **Note:** This is O(1) for forward, and O(n) for backward. + * + * \param x The input expression + * \param d The new dimensions + * + * \return The reshaped expression + */ +Expression reshape(const Expression& x, const Dim& d); + +/** + * \ingroup flowoperations + * \brief Transpose a matrix + * \details Transpose a matrix or tensor, or if dims is specified shuffle the + * dimensions arbitrarily. + * **Note:** This is O(1) if either the row or column dimension is 1, + * and O(n) otherwise. + * + * \param x The input expression + * \param dims The dimensions to swap. The ith dimension of the output will be equal + * to the dims[i] dimension of the input. dims must have the same number + * of dimensions as x. + * + * \return The transposed/shuffled expression + */ +Expression transpose(const Expression& x, const std::vector & dims = {1,0}); + +/** + * \ingroup flowoperations + * \brief Select rows + * \details Select a subset of rows of a matrix. + * + * \param x The input expression + * \param rows The rows to extract + * + * \return An expression containing the selected rows + */ +Expression select_rows(const Expression& x, const std::vector& rows); + +/** + * \ingroup flowoperations + * \brief Modifiable select rows + * \details Select a subset of rows of a matrix, where the elements of prows + * can be modified without re-creating the computation graph. + * + * \param x The input expression + * \param prows The rows to extract + * + * \return An expression containing the selected rows + */ +Expression select_rows(const Expression& x, const std::vector* prows); + +/** + * \ingroup flowoperations + * \brief Select columns + * \details Select a subset of columns of a matrix. select_cols is more + * efficient than select_rows since DyNet uses column-major order. + * + * \param x The input expression + * \param columns The columns to extract + * + * \return An expression containing the selected columns + */ +Expression select_cols(const Expression& x, const std::vector& cols); + +/** + * \ingroup flowoperations + * \brief Modifiable select columns + * \details Select a subset of columns of a matrix, where the elements of pcols + * can be modified without re-creating the computation graph. + * + * \param x The input expression + * \param pcolumns The columns to extract + * + * \return An expression containing the selected columns + */ +Expression select_cols(const Expression& x, const std::vector* pcols); + +/** + * \ingroup flowoperations + * \brief Sum over minibatches + * \details Sum an expression that consists of multiple minibatches into one of + * equal dimension but with only a single minibatch. This is useful + * for summing loss functions at the end of minibatch training. + * + * \param x The input mini-batched expression + * + * \return An expression with a single batch + */ +Expression sum_batches(const Expression& x); + +/** + * \ingroup flowoperations + * \brief Compute moment over minibatches + * \details Compute the moment of order \f$r\f$, \f$\frac 1 n\sum_{i=1}^nx_i^r\f$ along the batch dimension + * + * \param x The input mini-batched expression + * \param r Order of the moment + * + * \return An expression with a single batch + */ +Expression moment_batches(const Expression& x, unsigned r); + + +/** + * \ingroup flowoperations + * \brief Compute mean over minibatches + * \details Computes \f$\frac 1 n\sum_{i=1}^nx_i\f$ along the batch dimension + * + * \param x The input mini-batched expression + * + * \return An expression with a single batch + */ +Expression mean_batches(const Expression& x); + +/** + * \ingroup flowoperations + * \brief Compute standard deviation over minibatches + * \details Computes \f$\frac 1 n\sum_{i=1}^n(x_i -\mu)^2\f$ where \f$\mu=\frac 1 n\sum_{i=1}^nx_i\f$ along the batch dimension + * + * \param x The input mini-batched expression + * + * \return A scalar expression (with a potential batch dimension) + */ +Expression std_batches(const Expression& x); + +/** + * \ingroup flowoperations + * \brief Compute standard deviation along an arbitrary dimension + * \details Computes \f$\frac 1 n\sum_{i=1}^n(x_i -\mu)^2\f$ where \f$\mu=\frac 1 n\sum_{i=1}^nx_i\f$ along an arbitrary dimension + * + * \param x The input mini-batched expression + * \param d Dimension along which to reduce + * + * \return A scalar expression (with a potential batch dimension) + */ +Expression std_dim(const Expression& x, unsigned d); + +/** + * \ingroup flowoperations + * \brief Compute moment along a specific dimension + * \details Compute the moment of order \f$r\f$, \f$\frac 1 n\sum_{i=1}^nx_i^r\f$ along a specific dimension + * + * \param x The input mini-batched expression + * \param d Dimension along which to reduce + * \param r Order of the moment + * + * \return An expression with one less dimension + */ +Expression moment_dim(const Expression& x, unsigned d, unsigned r); +/** + * \ingroup flowoperations + * \brief Compute mean along a specific dimension + * \details Computes \f$\frac 1 n\sum_{i=1}^nx_i\f$ along a specific dimension + * + * \param x The input mini-batched expression + * \param d Dimension along which to reduce + * + * \return An expression with one less dimension + */ +Expression mean_dim(const Expression& x, unsigned d); + + +/** + * \ingroup flowoperations + * \brief Pick element + * \details Pick a single element/row/column/sub-tensor from an expression. + * This will result in the dimension of the tensor being reduced + * by 1. + * + * \param x The input expression + * \param v The index of the element to select + * \param d The dimension along which to choose the element + * + * \return The value of x[v] along dimension d + */ +Expression pick(const Expression& x, unsigned v, unsigned d = 0); + +/** + * \ingroup flowoperations + * \brief Batched pick + * \details Pick elements from multiple batches. + * + * \param x The input expression + * \param v A vector of indicies to choose, one for each batch in the + * input expression. + * \param d The dimension along which to choose the elements + * + * \return A mini-batched expression containing the picked elements + */ +Expression pick(const Expression& x, const std::vector & v, unsigned d = 0); + +/** + * \ingroup flowoperations + * \brief Modifiable pick element + * \details Pick a single element from an expression, where the index is + * passed by pointer so we do not need to re-create the computation + * graph every time. + * + * \param x The input expression + * \param pv Pointer to the index of the element to select + * \param d The dimension along which to choose the elements + * + * \return The value of x[*pv] + */ +Expression pick(const Expression& x, const unsigned * pv, unsigned d = 0); + +/** + * \ingroup flowoperations + * \brief Modifiable batched pick element + * \details Pick multiple elements from an input expression, where the indices + * are passed by pointer so we do not need to re-create the computation + * graph every time. + * + * \param x The input expression + * \param pv A pointer to vector of indicies to choose + * \param d The dimension along which to choose the elements + * + * \return A mini-batched expression containing the picked elements + */ +Expression pick(const Expression& x, const std::vector * pv, unsigned d = 0); + +/** + * \ingroup flowoperations + * \brief Pick range of elements + * \details Pick a range of elements from an expression. + * + * \param x The input expression + * \param s The start index + * \param e The end index + * \param d The dimension along which to pick + * + * \return The value of {x[v],...,x[u]} + */ +Expression pick_range(const Expression& x, unsigned s, unsigned e, unsigned d = 0); +// DEPRECATED +Expression pickrange(const Expression& x, unsigned s, unsigned e); + +/** + * \ingroup flowoperations + * \brief (Modifiable) Pick batch element. + * \details Pick batch element from a batched expression. For a Tensor with 3 batch elements: + * + * \f$ + * \begin{pmatrix} + * x_{1,1,1} & x_{1,1,2} \\ + * x_{1,2,1} & x_{1,2,2} \\ + * \end{pmatrix} + * \begin{pmatrix} + * x_{2,1,1} & x_{2,1,2} \\ + * x_{2,2,1} & x_{2,2,2} \\ + * \end{pmatrix} + * \begin{pmatrix} + * x_{3,1,1} & x_{3,1,2} \\ + * x_{3,2,1} & x_{3,2,2} \\ + * \end{pmatrix} + * \f$ + * + * pick_batch_elem(t, 1) will return a Tensor of + * + * \f$ + * \begin{pmatrix} + * x_{2,1,1} & x_{2,1,2} \\ + * x_{2,2,1} & x_{2,2,2} \\ + * \end{pmatrix} + * \f$ + * + * \param x The input expression + * \param v The index of the batch element to be picked. + * + * \return The expression of picked batch element. The picked element is a tensor + * whose `bd` equals to one. + */ +Expression pick_batch_elem(const Expression& x, unsigned v); + +/** + * \ingroup flowoperations + * \brief (Modifiable) Pick batch elements. + * \details Pick several batch elements from a batched expression. For a Tensor with 3 batch elements: + * + * \f$ + * \begin{pmatrix} + * x_{1,1,1} & x_{1,1,2} \\ + * x_{1,2,1} & x_{1,2,2} \\ + * \end{pmatrix} + * \begin{pmatrix} + * x_{2,1,1} & x_{2,1,2} \\ + * x_{2,2,1} & x_{2,2,2} \\ + * \end{pmatrix} + * \begin{pmatrix} + * x_{3,1,1} & x_{3,1,2} \\ + * x_{3,2,1} & x_{3,2,2} \\ + * \end{pmatrix} + * \f$ + * + * pick_batch_elems(t, {2, 3}) will return a Tensor of with 2 batch elements: + * + * \f$ + * \begin{pmatrix} + * x_{2,1,1} & x_{2,1,2} \\ + * x_{2,2,1} & x_{2,2,2} \\ + * \end{pmatrix} + * \begin{pmatrix} + * x_{3,1,1} & x_{3,1,2} \\ + * x_{3,2,1} & x_{3,2,2} \\ + * \end{pmatrix} + * \f$ + * + * \param x The input expression + * \param v A vector of indicies of the batch elements to be picked. + * + * \return The expression of picked batch elements. The batch elements is a tensor + * whose `bd` equals to the size of vector `v`. + */ +Expression pick_batch_elems(const Expression& x, const std::vector & v); + +/** + * \ingroup flowoperations + * \brief Pick batch element. + * \details Pick batch element from a batched expression. + * \param x The input expression + * \param v A pointer to the index of the correct element to be picked. + * + * \return The expression of picked batch element. The picked element is a tensor + * whose `bd` equals to one. + */ +Expression pick_batch_elem(const Expression& x, const unsigned* v); + +/** + * \ingroup flowoperations + * \brief Pick batch elements. + * \details Pick several batch elements from a batched expression. + * \param x The input expression + * \param v A pointer to the indexes + * + * \return The expression of picked batch elements. The batch elements is a tensor + * whose `bd` equals to the size of vector `v`. + */ +Expression pick_batch_elems(const Expression& x, const std::vector * pv); + +/** + * \ingroup flowoperations + * \brief Concatenate list of expressions to a single batched expression + * \details Perform a concatenation of several expressions along the batch dimension. + * All expressions must have the same shape except for the batch dimension. + * + * \param xs The input expressions + * + * \return The expression with the batch dimensions concatenated + */ +inline Expression concatenate_to_batch(const std::initializer_list& xs) { return detail::f(xs); } +template +inline Expression concatenate_to_batch(const T& xs) { return detail::f(xs); } + +/** + * \ingroup flowoperations + * \brief Concatenate columns + * \details Perform a concatenation of the columns in multiple expressions. + * All expressions must have the same number of rows. + * + * \param xs The input expressions + * + * \return The expression with the columns concatenated + */ +inline Expression concatenate_cols(const std::initializer_list& xs) { return detail::f(xs, 1); } +template +inline Expression concatenate_cols(const T& xs) { return detail::f(xs, 1); } + +/** + * \ingroup flowoperations + * \brief Concatenate + * \details Perform a concatenation of multiple expressions along + * a particular dimension. + * All expressions must have the same dimensions except for + * the dimension to be concatenated (rows by default). + * + * \param xs The input expressions + * \param xs The dimension along which to perform concatenation + * + * \return The expression with the specified dimension concatenated + */ +inline Expression concatenate(const std::initializer_list& xs, unsigned d = 0) { return detail::f(xs, d); } +template +inline Expression concatenate(const T& xs, unsigned d = 0) { return detail::f(xs, d); } + +/** + * \ingroup flowoperations + * \brief Max out through a dimension + * \details Select out a element/row/column/sub-tensor from an expression, + * with maximum value along a given dimension. + * This will result in the dimension of the tensor being reduced + * by 1. + * + * \param x The input expression + * \param d The dimension along which to choose the element + * + * \return An expression of sub-tensor with max value along dimension d + */ +Expression max_dim(const Expression& x, unsigned d = 0); + +/** + * \ingroup flowoperations + * \brief Min out through a dimension + * \details Select out a element/row/column/sub-tensor from an expression, + * with minimum value along a given dimension. + * This will result in the dimension of the tensor being reduced + * by 1. + * + * \param x The input expression + * \param d The dimension along which to choose the element + * + * \return An expression of sub-tensor with min value along dimension d + */ +Expression min_dim(const Expression& x, unsigned d = 0); + + +//////////////////////////////////////////////// +// Noise operations // +//////////////////////////////////////////////// + +/** + * \ingroup noiseoperations + * \brief Gaussian noise + * \details Add gaussian noise to an expression. + * + * \param x The input expression + * \param stddev The standard deviation of the gaussian + * + * \return The noised expression + */ +Expression noise(const Expression& x, real stddev); + +/** + * \ingroup noiseoperations + * \brief Dropout + * \details + * With a fixed probability, drop out (set to zero) nodes in the input + * expression, and **scale** the remaining nodes by 1/p. Note that there are + * [two kinds of dropout](http://cs231n.github.io/neural-networks-2/#reg): + * - *Regular dropout:* where we perform dropout at training time and then\n + * scale outputs by p at test time. + * - *Inverted dropout:* where we perform dropout and scaling at training\n + * time, and do not need to do anything at test time. + * DyNet implements the latter, so you only need to apply dropout at training + * time, and do not need to perform scaling and test time. + * + * \param x The input expression + * \param p The dropout probability + * + * \return The dropped out expression + */ +Expression dropout(const Expression& x, real p); + +/** + * \ingroup noiseoperations + * \brief Dropout along a specific dimension + * \details Identical to the dropout operation except the dropout mask is the same across one dimension. Use this if you want to drop columns or lines in a matrix for example + * + * For now this only supports tensors of order <= 3 (with or without batch dimension) + * + * \param x The input expression + * \param d The dimension along which to drop + * \param p The dropout probability + * + * \return The dropped out expression + */ +Expression dropout_dim(const Expression& x, unsigned d, real p); + +/** + * \ingroup noiseoperations + * \brief Dropout entire elements of a minibatch + * \details Identical to the dropout operation except entire batch elements are dropped + * + * \param x The input expression + * \param p The dropout probability + * + * \return The dropped out expression + */ +Expression dropout_batch(const Expression& x, real p); + +/** + * \ingroup noiseoperations + * \brief Block dropout + * \details Identical to the dropout operation, but either drops out *all* + * or *no* values in the expression, as opposed to making a decision + * about each value individually. + * + * \param x The input expression + * \param p The block dropout probability + * + * \return The block dropout expression + */ +Expression block_dropout(const Expression& x, real p); + +//////////////////////////////////////////////// +// Convolution operations // +//////////////////////////////////////////////// + +//Expression conv1d_narrow(const Expression& x, const Expression& f); +//Expression conv1d_wide(const Expression& x, const Expression& f); +Expression filter1d_narrow(const Expression& x, const Expression& f); +Expression kmax_pooling(const Expression& x, unsigned k, unsigned d = 1); +Expression fold_rows(const Expression& x, unsigned nrows = 2); +Expression sum_dim(const Expression& x, unsigned d); +Expression sum_cols(const Expression& x); +Expression sum_rows(const Expression& x); +Expression average_cols(const Expression& x); +Expression kmh_ngram(const Expression& x, unsigned n); + + +/** + * \ingroup convolutionoperations + * \brief conv2d without bias + * \details + * 2D convolution operator without bias parameters. + * 'VALID' and 'SAME' convolutions are supported. + * Think about when stride is 1, the distinction: + * - *SAME*: output size is the same with input size. To do so, one needs to pad the input so the filter can sweep outside of the input maps. + * - *VALID*: output size shrinks by filter_size - 1, and the filters always sweep at valid positions inside the input maps. No padding needed. + * + * In detail, assume: + * - Input feature maps: (XH x XW x XC) x N + * - Filters: FH x FW x XC x FC, 4D tensor + * - Strides: strides[0] and strides[1] are row (h) and col (w) stride, respectively. + * + * For the *SAME* convolution: the output height (YH) and width (YW) are computed as: + * - YH = ceil(float(XH) / float(strides[0])) + * - YW = ceil(float(XW) / float(strides[1])) + * and the paddings are computed as: + * - pad_along_height = max((YH - 1) * strides[0] + FH - XH, 0) + * - pad_along_width = max((YW - 1) * strides[1] + FW - XW, 0) + * - pad_top = pad_along_height / 2 + * - pad_bottom = pad_along_height - pad_top + * - pad_left = pad_along_width / 2 + * - pad_right = pad_along_width - pad_left + * + * For the *VALID* convolution: the output height (YH) and width (YW) are computed as: + * - YH = ceil(float(XH - FH + 1) / float(strides[0])) + * - YW = ceil(float(XW - FW + 1) / float(strides[1])) + * and the paddings are always zeros. + * + * \param x The input feature maps: (H x W x Ci) x N (ColMaj), 3D tensor with an optional batch dimension + * \param f 2D convolution filters: H x W x Ci x Co (ColMaj), 4D tensor + * \param stride the row and column strides + * \param is_valid 'VALID' convolution or 'SAME' convolution, default is True ('VALID') + * + * \return The output feature maps (H x W x Co) x N, 3D tensor with an optional batch dimension + */ +Expression conv2d(const Expression& x, const Expression& f, const std::vector& stride, bool is_valid = true); + +/** + * \ingroup convolutionoperations + * \brief conv2d with bias + * \details + * 2D convolution operator with bias parameters. + * 'VALID' and 'SAME' convolutions are supported. + * Think about when stride is 1, the distinction: + * - *SAME*: output size is the same with input size. To do so, one needs to pad the input so the filter can sweep outside of the input maps. + * - *VALID*: output size shrinks by filter_size - 1, and the filters always sweep at valid positions inside the input maps. No padding needed. + * + * In detail, assume: + * - Input feature maps: XH x XW x XC x N + * - Filters: FH x FW x XC x FC + * - Strides: strides[0] and strides[1] are row (h) and col (w) stride, respectively. + * + * For the *SAME* convolution: the output height (YH) and width (YW) are computed as: + * - YH = ceil(float(XH) / float(strides[0])) + * - YW = ceil(float(XW) / float(strides[1])) + * and the paddings are computed as: + * - pad_along_height = max((YH - 1) * strides[0] + FH - XH, 0) + * - pad_along_width = max((YW - 1) * strides[1] + FW - XW, 0) + * - pad_top = pad_along_height / 2 + * - pad_bottom = pad_along_height - pad_top + * - pad_left = pad_along_width / 2 + * - pad_right = pad_along_width - pad_left + * + * For the *VALID* convolution: the output height (YH) and width (YW) are computed as: + * - YH = ceil(float(XH - FH + 1) / float(strides[0])) + * - YW = ceil(float(XW - FW + 1) / float(strides[1])) + * and the paddings are always zeros. + * + * \param x The input feature maps: (H x W x Ci) x N (ColMaj), 3D tensor with an optional batch dimension + * \param f 2D convolution filters: H x W x Ci x Co (ColMaj), 4D tensor + * \param b The bias (1D: Ci) + * \param stride the row and column strides + * \param is_valid 'VALID' convolution or 'SAME' convolution, default is True ('VALID') + * + * \return The output feature maps (H x W x Co) x N, 3D tensor with an optional batch dimension + */ +Expression conv2d(const Expression& x, const Expression& f, const Expression& b, const std::vector& stride, bool is_valid = true); + +//////////////////////////////////////////////// +// Tensor operations // +//////////////////////////////////////////////// + +/** + * \ingroup tensoroperations + * \brief Contracts a rank 3 tensor and a rank 1 tensor into a rank 2 tensor + * \details The resulting tensor \f$z\f$ has coordinates \f$z_ij = \sum_k x_{ijk} y_k\f$ + * + * \param x Rank 3 tensor + * \param y Vector + * + * \return Matrix + */ +Expression contract3d_1d(const Expression& x, const Expression& y); +// z_i = x_ijk * y_k * z_j (+ b_i) +/** + * \ingroup tensoroperations + * \brief Contracts a rank 3 tensor and two rank 1 tensor into a rank 1 tensor + * \details This is the equivalent of calling `contract3d_1d` and then performing a matrix vector multiplication. + * + * The resulting tensor \f$t\f$ has coordinates \f$t_i = \sum_{j,k} x_{ijk} y_k z_j\f$ + * + * \param x Rank 3 tensor + * \param y Vector + * \param z Vector + * \return Vector + */ +Expression contract3d_1d_1d(const Expression& x, const Expression& y, const Expression& z); +/** + * \ingroup tensoroperations + * \brief Same as `contract3d_1d_1d` with an additional bias parameter + * \details This is the equivalent of calling `contract3d_1d` and then performing an affine transform. + * + * The resulting tensor \f$t\f$ has coordinates \f$t_i = b_i + \sum_{j,k} x_{ijk} y_k z_j\f$ + * + * \param x Rank 3 tensor + * \param y Vector + * \param z Vector + * \param b Bias vector + * \return Vector + */ +Expression contract3d_1d_1d(const Expression& x, const Expression& y, const Expression& z, const Expression& b); +// z_ij = x_ijk * y_k + b_ij +/** + * \ingroup tensoroperations + * \brief Same as `contract3d_1d` with an additional bias parameter + * \details The resulting tensor \f$z\f$ has coordinates \f$z_{ij} = b_{ij}+\sum_k x_{ijk} y_k\f$ + * + * \param x Rank 3 tensor + * \param y Vector + * \param b Bias matrix + * \return Matrix + */ +Expression contract3d_1d(const Expression& x, const Expression& y, const Expression& b); + + +//////////////////////////////////////////////// +// Linear algebra operations // +//////////////////////////////////////////////// + +/** + * \ingroup linalgoperations + * \brief Matrix Inverse + * \details Takes the inverse of a matrix (not implemented on GPU yet, although + * contributions are welcome: https://github.com/clab/dynet/issues/158). + * Note that back-propagating through an inverted matrix can also be the + * source of stability problems sometimes. + * + * \param x A square matrix + * + * \return The inverse of the matrix + */ +Expression inverse(const Expression& x); + +/** + * \ingroup linalgoperations + * \brief Log determinant + * \details Takes the log of the determinant of a matrix. + * (not implemented on GPU yet, although + * contributions are welcome: https://github.com/clab/dynet/issues/158). + * + * \param x A square matrix + * + * \return The log of its determinant + */ +Expression logdet(const Expression& x); + +/** + * \ingroup linalgoperations + * \brief Trace of Matrix Product + * \details Takes the trace of the product of matrices. + * (not implemented on GPU yet, although + * contributions are welcome: https://github.com/clab/dynet/issues/158). + * + * \param x1 A matrix + * \param x2 Another matrix + * + * \return trace(x1 * x2) + */ +Expression trace_of_product(const Expression& x, const Expression& y); + +//////////////////////////////////////////////// +// Normalization operations // +//////////////////////////////////////////////// + +/** + * \ingroup normoperations + * \brief Layer normalization + * \details Performs layer normalization : + * + * \f$ + * \begin{split} + * \mu &= \frac 1 n \sum_{i=1}^n x_i\\ + * \sigma &= \sqrt{\frac 1 n \sum_{i=1}^n (x_i-\mu)^2}\\ + * y&=\frac {\boldsymbol{g}} \sigma \circ (\boldsymbol{x}-\mu) + \boldsymbol{b}\\ + * \end{split} + * \f$ + * + * Reference : [Ba et al., 2016](http://arxiv.org/abs/1607.06450) + * + * \param x Input expression (possibly batched) + * \param g Gain (same dimension as x, no batch dimension) + * \param b Bias (same dimension as x, no batch dimension) + * \return An expression of the same dimension as `x` + */ +Expression layer_norm(const Expression& x, const Expression& g, const Expression& b); +} +// Because expressions are now such a fundamental part of DyNet it doesn't +// make much sense to keep them in separate namespaces, so we import expr +// to the dynet namespace. +using namespace expr; +} + +#endif diff --git a/thirdparty/dynet/dynet/fast-lstm.cc b/thirdparty/dynet/dynet/fast-lstm.cc new file mode 100644 index 000000000..7758efb74 --- /dev/null +++ b/thirdparty/dynet/dynet/fast-lstm.cc @@ -0,0 +1,217 @@ +#include "dynet/fast-lstm.h" + +#include +#include +#include + +#include "dynet/nodes.h" + +using namespace std; +using namespace dynet::expr; + +namespace dynet { + +enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; + +/* +FastLSTM replaces the matrices from cell to other units, by diagonal matrices. +Namely: C2O, C2I. +*/ + +FastLSTMBuilder::FastLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) : layers(layers) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_x2i = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_c2i = model.add_parameters({hidden_dim, 1}); + Parameter p_bi = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // o + Parameter p_x2o = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2o = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_c2o = model.add_parameters({hidden_dim, 1}); + Parameter p_bo = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // c + Parameter p_x2c = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2c = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bc = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; + params.push_back(ps); + } // layers +} + +void FastLSTMBuilder::new_graph_impl(ComputationGraph& cg){ + param_vars.clear(); + + for (unsigned i = 0; i < layers; ++i){ + auto& p = params[i]; + + //i + Expression i_x2i = parameter(cg,p[X2I]); + Expression i_h2i = parameter(cg,p[H2I]); + Expression i_c2i = parameter(cg,p[C2I]); + Expression i_bi = parameter(cg,p[BI]); + //o + Expression i_x2o = parameter(cg,p[X2O]); + Expression i_h2o = parameter(cg,p[H2O]); + Expression i_c2o = parameter(cg,p[C2O]); + Expression i_bo = parameter(cg,p[BO]); + //c + Expression i_x2c = parameter(cg,p[X2C]); + Expression i_h2c = parameter(cg,p[H2C]); + Expression i_bc = parameter(cg,p[BC]); + + vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; + param_vars.push_back(vars); + } +} + +// layout: 0..layers = c +// layers+1..2*layers = h +void FastLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "FastLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state and cell for each layer). However, for " << layers << + " layers, " << hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } +} + +// TO DO - Make this correct +// Copied c from the previous step (otherwise c.size()< h.size()) +// Also is creating a new step something we want? +// wouldn't overwriting the current one be better? +Expression FastLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(!(h_new.size() && h_new.size() != layers), + "FastLSTMBuilder::set_h expects as many inputs as layers, " + "but got " << h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression FastLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(!(s_new.size() == layers || s_new.size() == 2 * layers), + "FastLSTMBuilder::set_s expects either as many inputs or twice as many " + "inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} + + +Expression FastLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + bool has_prev_state = (prev >= 0 || has_initial_state); + if (prev < 0) { + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + } else { // t > 0 + i_h_tm1 = h[prev][i]; + i_c_tm1 = c[prev][i]; + } + // input + Expression i_ait; + if (has_prev_state) { +// i_ait = vars[BI] + vars[X2I] * in + vars[H2I]*i_h_tm1 + cmult(vars[C2I], i_c_tm1); + i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1}) + + cmult(vars[C2I], i_c_tm1); + } else { +// i_ait = vars[BI] + vars[X2I] * in; + i_ait = affine_transform({vars[BI], vars[X2I], in}); + } + Expression i_it = logistic(i_ait); + // forget + Expression i_ft = 1.f - i_it; + // write memory cell + Expression i_awt; + if (has_prev_state) +// i_awt = vars[BC] + vars[X2C] * in + vars[H2C]*i_h_tm1; + i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); + else +// i_awt = vars[BC] + vars[X2C] * in; + i_awt = affine_transform({vars[BC], vars[X2C], in}); + Expression i_wt = tanh(i_awt); + // output + if (has_prev_state) { + Expression i_nwt = cmult(i_it,i_wt); + Expression i_crt = cmult(i_ft,i_c_tm1); + ct[i] = i_crt + i_nwt; + } else { + ct[i] = cmult(i_it,i_wt); + } + + Expression i_aot; + if (has_prev_state) { +// i_aot = vars[BO] + vars[X2O] * in + vars[H2O] * i_h_tm1 + cmult(vars[C2O], ct[i]); + i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1}) + + cmult(vars[C2O], ct[i]); + } + else { +// i_aot = vars[BO] + vars[X2O] * in; + i_aot = affine_transform({vars[BO], vars[X2O], in}); + } + Expression i_ot = logistic(i_aot); + Expression ph_t = tanh(ct[i]); + in = ht[i] = cmult(i_ot,ph_t); + } + return ht.back(); +} + +void FastLSTMBuilder::copy(const RNNBuilder & rnn) { + const FastLSTMBuilder & rnn_lstm = (const FastLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy FastLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for(size_t i = 0; i < params.size(); ++i) + for(size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/fast-lstm.h b/thirdparty/dynet/dynet/fast-lstm.h new file mode 100644 index 000000000..0d841bfea --- /dev/null +++ b/thirdparty/dynet/dynet/fast-lstm.h @@ -0,0 +1,68 @@ +#ifndef DYNET_FAST_LSTM_H_ +#define DYNET_FAST_LSTM_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" + +using namespace dynet::expr; + +namespace dynet { + +class Model; + +/* +FastLSTM replaces the matrices from cell to other units, by diagonal matrices. +*/ +struct FastLSTMBuilder : public RNNBuilder { + FastLSTMBuilder() = default; + explicit FastLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + + Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for(auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for(auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + public: + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/functors.h b/thirdparty/dynet/dynet/functors.h new file mode 100644 index 000000000..b3c8d7b68 --- /dev/null +++ b/thirdparty/dynet/dynet/functors.h @@ -0,0 +1,317 @@ +#ifndef DYNET_GPU_FUNCTORS_H +#define DYNET_GPU_FUNCTORS_H + +#include +#include +#include + +#if HAVE_CUDA +# define DYNET_DEVICE_FUNC __device__ +# define DYNET_DEVICE_MIN 1.175494351e-38f +#else +# define DYNET_DEVICE_FUNC +# define DYNET_DEVICE_MIN std::numeric_limits::min() +#endif + +// these functions are used both in CPU and in GPU computation +// this file may be compiled with NVCC or a standard C++ tool. +// if you need a new elementwise (nullary, unary, binary...) +// functor, this is the place for it +// +// note: also see xfunctors.h - functors implemented there can +// use Eigen's internal support for vectorized operations which +// can give faster performance on some hardware + +namespace dynet { + +struct FHuberForward { + FHuberForward(float c) : c(c) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + const float a = fabs(x); + return (a < c) ? x*x : c*(2*a - c); + } + const float c; +}; + +// template int sgn(T val) { +// return ((T(0) < val) - (val < T(0))); +// } + +struct FL1Backward { + FL1Backward(float d) : d(d) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return ((0.f < x) - (x < 0.f)) * d; + } + const float d; +}; + +struct FHuberBackward { + FHuberBackward(float c, float dEdf) : c(c), d(dEdf) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + const float a = fabs(x); + return (2 * d) * ((a < c) ? x : c * ((0.f < x) - (x < 0.f))); + } + const float c; + const float d; +}; + +struct FProduct { + DYNET_DEVICE_FUNC inline float operator()(float a, float b) const { + return a * b; + } +}; + +struct FQuotient { + DYNET_DEVICE_FUNC inline float operator()(float a, float b) const { + return a / b; + } +}; + +struct FConstantPlus { + FConstantPlus(float c) : c(c) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return c + x; + } + float c; +}; + +struct FConstantMinus { + FConstantMinus(float c) : c(c) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return c - x; + } + float c; +}; + +struct FNegate { + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return -x; + } +}; + +struct FErf { + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return erff(x); + } +}; + +struct FTanh { + DYNET_DEVICE_FUNC inline float operator()(float x) const { +#ifdef FAST_TANH + float x2 = x * x; + float a = x * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2))); + float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f)); + return a / b; +#else + return tanhf(x); +#endif + } +}; + +struct FLog { + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return logf(x); + } +}; + +struct FMaxBackwardInv { + DYNET_DEVICE_FUNC inline float operator()(float u, float d) const { + return (1.f - u) * d; + } +}; + +struct FSqrtBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return d / (2.f * t); + } +}; + +struct FErfBackward { + DYNET_DEVICE_FUNC inline float operator()(float x, float d) const { + return 1.1283791670955125738961589f * expf(-x * x) * d; + } +}; + +struct FTanhBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return (1.f - t * t) * d; + } +}; + +struct FLogBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return (1.f / t) * d; + } +}; + +struct FPairwiseRankLoss { + FPairwiseRankLoss(float m) : margin(m) {} + DYNET_DEVICE_FUNC float operator()(float a, float b) const { + float d = margin - a + b; + return d > 0.f ? d : 0.f; + } + float margin; +}; + +struct FRectifyBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return (t) ? d : 0.f; + } +}; + +struct FRectifyNegateBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return (t) ? -d : 0.f; + } +}; + +struct FSoftmaxNormalize { + explicit FSoftmaxNormalize(float logz) : logz(logz) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return expf(x - logz); + } + float logz; +}; + +struct FSoftmaxBackward { + explicit FSoftmaxBackward(float off_diag_sum) : off_diag_sum(off_diag_sum) {} + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return (off_diag_sum + d) * t; + } + float off_diag_sum; +}; + +struct FNegLogSoftmaxBackward { + FNegLogSoftmaxBackward(float lz, float err) : logz(lz), d(err) {} + DYNET_DEVICE_FUNC inline float operator()(float t) const { + return expf(t - logz) * d; + } + float logz; + float d; +}; + +struct FPtrNegLogSoftmaxBackward { + FPtrNegLogSoftmaxBackward(const float* lz, const float* err) : logz(lz), d(err) {} + DYNET_DEVICE_FUNC inline float operator()(float t) const { + return expf(t - *logz) * *d; + } + const float* logz; + const float* d; +}; + +struct FLogSoftmaxNormalize { + explicit FLogSoftmaxNormalize(float logz) : logz(logz) {} + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return x - logz; + } + float logz; +}; + +struct FWeightedError { + float operator()(float t, float d) const { + return expf(t) * d / expf(t); + } +}; + +struct FLogSoftmaxBackward { + explicit FLogSoftmaxBackward(float off_diag_sum) : off_diag_sum(off_diag_sum) {} + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return off_diag_sum * expf(t) + d; + //return (off_diag_sum + d) * t; + } + float off_diag_sum; +}; + +struct FRectify { + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return (x > 0.f) ? x : 0.f; + } +}; + +struct FSoftSign { + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return x / (1.f + (x < 0.f ? -x : x)); + } +}; + +struct FSoftSignBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + float a = 1.f - (t < 0.f ? -t : t); + return a * a * d; + } +}; + +struct FLogisticSigmoid { + DYNET_DEVICE_FUNC inline float operator()(float x) const { + return 1.f / (1.f + expf(-x)); + } +}; + +struct FLogisticSigmoidBackward { + DYNET_DEVICE_FUNC inline float operator()(float t, float d) const { + return (1.f - t) * t * d; + } +}; + +struct FSqDist { + DYNET_DEVICE_FUNC inline float operator()(float a, float b) const { + float d = a - b; + return d * d; + } +}; + +struct FEuclideanBackward { + FEuclideanBackward(int i, const float* s) : i(i), scalar(s) {} + DYNET_DEVICE_FUNC inline float operator()(float a, float b) const { + return (i == 0 ? 2.f : -2.f) * (*scalar) * (a - b); + } + int i; + const float* scalar; +}; + +struct FL2SGDUpdate { + FL2SGDUpdate(float l, float s) : lambda(l), scale(-s) {} + DYNET_DEVICE_FUNC inline float operator()(float x, float g) const { + return scale * g - x * lambda; + } + float lambda; + float scale; +}; + +struct FBinaryLogLoss { + DYNET_DEVICE_FUNC inline float operator()(float x, float x_true) const { + if (x_true == 1.f) { + if (x == 0.f) return -1.f * log(DYNET_DEVICE_MIN); + return -1.f * log(x); + } + else if (x_true == 0.f) { + if (x == 1.f) return -1.f * log(DYNET_DEVICE_MIN); + else return (x_true - 1.f) * log1p(-x); + } + else { + if (x == 0.f) return -1.f * log(DYNET_DEVICE_MIN); + else if (x == 1.f) return -1.f * log(DYNET_DEVICE_MIN); + else return -1.f * (x_true * log(x) + (1.f - x_true) * log1p(-x)); + } + } +}; + +struct FBinaryLogLossBackward { + explicit FBinaryLogLossBackward(float d) : d(d) {} + DYNET_DEVICE_FUNC inline float operator()(float x, float x_true) const { + if (x == x_true) return 0; + if (x == 0.f) x = DYNET_DEVICE_MIN; + if (x == 1.f) x = 0.9999999f; + if (x_true == 1.f) { + return d * -x_true / x; + } else if (x_true == 0.f) { + return d * (1.f - x_true) / (1.f - x); + } + return d * ((1.f - x_true) / (1.f - x) + (-x_true / x)); + } + float d; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/globals.cc b/thirdparty/dynet/dynet/globals.cc new file mode 100644 index 000000000..c9bd6ccad --- /dev/null +++ b/thirdparty/dynet/dynet/globals.cc @@ -0,0 +1,11 @@ +#include "dynet/globals.h" +#include "dynet/devices.h" + +namespace dynet { + +std::mt19937* rndeng = nullptr; +std::vector devices; +Device* default_device = nullptr; +float weight_decay_lambda; + +} diff --git a/thirdparty/dynet/dynet/globals.h b/thirdparty/dynet/dynet/globals.h new file mode 100644 index 000000000..8c7676cd7 --- /dev/null +++ b/thirdparty/dynet/dynet/globals.h @@ -0,0 +1,17 @@ +#ifndef DYNET_GLOBALS_H +#define DYNET_GLOBALS_H + +#include +#include + +namespace dynet { + +class Device; + +extern std::mt19937* rndeng; +extern std::vector devices; +extern Device* default_device; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/gpu-kernels.h b/thirdparty/dynet/dynet/gpu-kernels.h new file mode 100644 index 000000000..9c39c965d --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-kernels.h @@ -0,0 +1,57 @@ +#ifndef DYNET_GPU_KERNELS_H +#define DYNET_GPU_KERNELS_H + +#include "dynet/cuda.h" + +namespace dynet { +namespace gpu { + +template +__global__ void unaryExprKernel(int n, const float* x, float* y, Func func) { + int i = threadIdx.x + blockIdx.x * blockDim.x; + while (i < n) { + y[i] = func(x[i]); + i += gridDim.x * blockDim.x; + } +} + +template +__global__ void accUnaryExprKernel(int n, const float* x, float* y, Func func) { + int i = threadIdx.x + blockIdx.x * blockDim.x; + while (i < n) { + y[i] += func(x[i]); + i += gridDim.x * blockDim.x; + } +} + +template +__global__ void binaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) { + int i = threadIdx.x + blockIdx.x * blockDim.x; + while (i < n) { + y[i] = func(x0[i], x1[i]); + i += gridDim.x * blockDim.x; + } +} + +template +__global__ void accBinaryExprKernel(int n, const float* x0, const float* x1, float* y, Func func) { + int i = threadIdx.x + blockIdx.x * blockDim.x; + while (i < n) { + y[i] += func(x0[i], x1[i]); + i += gridDim.x * blockDim.x; + } +} + +template +__global__ void slowReduceKernel(int n, const float* x0, const float* x1, float* y, Func func) { + float ty = 0; + // THIS IS BAD - FIX THIS TO MAKE IT FAST + for (int i = 0; i < n; ++i) + ty += func(x0[i], x1[i]); + y[0] = ty; +} + +} // namespace gpu +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/gpu-model.cu b/thirdparty/dynet/dynet/gpu-model.cu new file mode 100644 index 000000000..73638f402 --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-model.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as model.cc but compiled +// on CUDA +#include "model.cc" diff --git a/thirdparty/dynet/dynet/gpu-nodes-contract.cu b/thirdparty/dynet/dynet/gpu-nodes-contract.cu new file mode 100644 index 000000000..14f28e337 --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-nodes-contract.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-contract.cc" diff --git a/thirdparty/dynet/dynet/gpu-nodes-conv.cu b/thirdparty/dynet/dynet/gpu-nodes-conv.cu new file mode 100644 index 000000000..451f71b36 --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-nodes-conv.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes-conv.cc but compiled +// on CUDA +#include "nodes-conv.cc" diff --git a/thirdparty/dynet/dynet/gpu-nodes-conv2d.cu b/thirdparty/dynet/dynet/gpu-nodes-conv2d.cu new file mode 100644 index 000000000..347aaadcf --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-nodes-conv2d.cu @@ -0,0 +1 @@ +#include "nodes-conv2d.cc" diff --git a/thirdparty/dynet/dynet/gpu-nodes.cu b/thirdparty/dynet/dynet/gpu-nodes.cu new file mode 100644 index 000000000..0f0efceed --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-nodes.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes.cc" diff --git a/thirdparty/dynet/dynet/gpu-ops.cu b/thirdparty/dynet/dynet/gpu-ops.cu new file mode 100644 index 000000000..63d7f63a1 --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-ops.cu @@ -0,0 +1,105 @@ +#include "dynet/cuda.h" +#include "dynet/gpu-ops.h" +#include "dynet/gpu-kernels.h" +#include "dynet/functors.h" + +namespace dynet { +namespace gpu { + +// CUDA kernel. Each thread takes care of one element of c +__global__ void ker_dense_to_sparse_assign(int n, const unsigned int *idx, float *src, float *trg) { + // Get our global thread ID + int id = blockIdx.x*blockDim.x+threadIdx.x; + + // Make sure we do not go out of bounds + if (id < n) + trg[idx[id]] = src[id]; +} + +void dense_to_sparse_assign(int n, const unsigned int *idx, float *src, float *trg) { + if(n > 0) { + auto tb = SizeToBlockThreadPair(n); + int total_size = tb.first*tb.second; + for(int curr_pos = 0; curr_pos < n; curr_pos += total_size) + ker_dense_to_sparse_assign<<>>(min(total_size, n-curr_pos), idx+curr_pos, src+curr_pos, trg); + } +} + +// CUDA kernel. Each thread takes care of one element of c +__global__ void ker_sparse_to_dense_assign(int n, const unsigned int *idx, float *src, float *trg) { + // Get our global thread ID + int id = blockIdx.x*blockDim.x+threadIdx.x; + + // Make sure we do not go out of bounds + if (id < n) + trg[id] = src[idx[id]]; +} + +void sparse_to_dense_assign(int n, const unsigned int *idx, float *src, float *trg) { + if(n > 0) { + auto tb = SizeToBlockThreadPair(n); + int total_size = tb.first*tb.second; + for(int curr_pos = 0; curr_pos < n; curr_pos += total_size) + ker_sparse_to_dense_assign<<>>(min(total_size, n-curr_pos), idx+curr_pos, src, trg+curr_pos); + } +} + +// CUDA kernel. Each thread takes care of one element of c +__global__ void ker_dense_to_sparse_subtract(int n, const unsigned int *idx, float *src, float *trg) { + // Get our global thread ID + int id = blockIdx.x*blockDim.x+threadIdx.x; + + // Make sure we do not go out of bounds + if (id < n) + atomicAdd(trg + idx[id], -src[id]); +} + +void dense_to_sparse_subtract(int n, const unsigned int *idx, float *src, float *trg) { + if(n > 0) { + auto tb = SizeToBlockThreadPair(n); + int total_size = tb.first*tb.second; + for(int curr_pos = 0; curr_pos < n; curr_pos += total_size) + ker_dense_to_sparse_subtract<<>>(min(total_size, n-curr_pos), idx+curr_pos, src+curr_pos, trg); + } +} + +// CUDA kernel. Each thread takes care of one element of c +__global__ void ker_sparse_to_dense_block_assign_and_multiply(int n, const unsigned *idx, int bsize, float mult, float* src, float *trg) { + // Get our global thread ID + int id = blockIdx.x*blockDim.x+threadIdx.x; + + // Make sure we do not go out of bounds + if (id < n*bsize) + trg[id] = src[idx[id/bsize]*bsize+id%bsize] * mult; +} + +void sparse_to_dense_block_assign_and_multiply(int n, const unsigned *idx, int bsize, float mult, float *src, float *trg) { + if(n > 0) { + auto tb = SizeToBlockThreadPair(n*bsize); + int total_size = tb.first*tb.second; + for(int curr_pos = 0; curr_pos < n; curr_pos += total_size/bsize) + ker_sparse_to_dense_block_assign_and_multiply<<>>(min(total_size/bsize, n-curr_pos), idx+curr_pos, bsize, mult, src, trg+curr_pos*bsize); + } +} + +// CUDA kernel. Each thread takes care of one element of c +__global__ void ker_dense_to_sparse_block_add(int n, const unsigned *idx, int bsize, float* src, float *trg) { + // Get our global thread ID + int id = blockIdx.x*blockDim.x+threadIdx.x; + + // Make sure we do not go out of bounds + if (id < n*bsize) + atomicAdd(trg + idx[id/bsize]*bsize+id%bsize, src[id]); +} + +void dense_to_sparse_block_add(int n, const unsigned *idx, int bsize, float *src, float *trg) { + if(n > 0) { + auto tb = SizeToBlockThreadPair(n*bsize); + int total_size = tb.first*tb.second; + for(int curr_pos = 0; curr_pos < n; curr_pos += total_size/bsize) + ker_dense_to_sparse_block_add<<>>(min(total_size/bsize, n-curr_pos), idx+curr_pos, bsize, src+curr_pos*bsize, trg); + } +} + +} // namespace gpu +} // namespace dynet diff --git a/thirdparty/dynet/dynet/gpu-ops.h b/thirdparty/dynet/dynet/gpu-ops.h new file mode 100644 index 000000000..a429f5686 --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-ops.h @@ -0,0 +1,16 @@ +#ifndef DYNET_GPU_OPS_H +#define DYNET_GPU_OPS_H + +namespace dynet { +namespace gpu { + +void dense_to_sparse_assign(int n, const unsigned int* ids, float* src, float* trg); +void sparse_to_dense_assign(int n, const unsigned int* ids, float* src, float* trg); +void dense_to_sparse_subtract(int n, const unsigned int* ids, float* src, float* trg); +void sparse_to_dense_block_assign_and_multiply(int n, const unsigned *idx, int bsize, float mult, float *src, float *trg); +void dense_to_sparse_block_add(int n, const unsigned* ids, int bsize, float* src, float* trg); + +} // namespace gpu +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/gpu-param-nodes.cu b/thirdparty/dynet/dynet/gpu-param-nodes.cu new file mode 100644 index 000000000..09a640f22 --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-param-nodes.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as param-nodes.cc but compiled +// on CUDA +#include "param-nodes.cc" diff --git a/thirdparty/dynet/dynet/gpu-tensor.cu b/thirdparty/dynet/dynet/gpu-tensor.cu new file mode 100644 index 000000000..de1a38dcf --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-tensor.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as tensor.cc but compiled +// on CUDA +#include "tensor.cc" diff --git a/thirdparty/dynet/dynet/gpu-training.cu b/thirdparty/dynet/dynet/gpu-training.cu new file mode 100644 index 000000000..7ad69abcb --- /dev/null +++ b/thirdparty/dynet/dynet/gpu-training.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as training.cc but compiled +// on CUDA +#include "training.cc" diff --git a/thirdparty/dynet/dynet/grad-check.cc b/thirdparty/dynet/dynet/grad-check.cc new file mode 100644 index 000000000..63991505d --- /dev/null +++ b/thirdparty/dynet/dynet/grad-check.cc @@ -0,0 +1,100 @@ +#include "dynet/grad-check.h" + +#include +#include + +#include "dynet/model.h" +#include "dynet/dynet.h" +#include "dynet/tensor.h" +#include "dynet/expr.h" + +using namespace std; + +namespace dynet { + +bool check_grad(Model& m, expr::Expression& expr, int verbosity) { + ComputationGraph& g = *expr.pg; + // Clear the parameters first + const vector& params = m.parameters_list(); + const vector& lookup_params = m.lookup_parameters_list(); + for (auto pp : params) + pp->clear(); + for (auto pp : lookup_params) + pp->clear(); + + // Perform forward and backward steps + float alpha = 5e-4; + g.forward(expr); + g.backward(expr); + + // Check + bool flag = false, curr_flag = false; + for (auto pp : params) { + if(verbosity > 1) + cerr << endl << "PARAMETERS " << pp << endl; + ParameterStorage& p = *pp; + if(p.g.v == nullptr) continue; + size_t ts = p.dim.size(); + for (size_t i = 0; i < ts; ++i) { + float old = TensorTools::access_element(p.values, i); + TensorTools::set_element(p.values, i, old - alpha); + float E_left = as_scalar(g.forward(expr)); + TensorTools::set_element(p.values, i, old + alpha); + float E_right = as_scalar(g.forward(expr)); + TensorTools::set_element(p.values, i, old); + float g = (E_right - E_left) / (2 * alpha); + float g_act = TensorTools::access_element(p.g, i); + float f = fabs(g - g_act); + float m = std::max(fabs(g), fabs(g_act)); + if (f > 0.01 && m > 0.f) f /= m; + if (f > 0.01 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } } + if(verbosity + (curr_flag ? 1 : 0) > 1) { + cerr << g_act << ' ' << g << endl; + curr_flag = false; + } + } + } + + for (auto pp : lookup_params) { + if(verbosity > 1) + cerr << endl << "LOOKUP PARAMETERS " << pp << endl; + LookupParameterStorage& p = *pp; + size_t ts = p.dim.size(); + for (unsigned j : p.non_zero_grads) { + if(verbosity > 1) + cerr << "OBJECT=" << j << endl; + Tensor& v = p.values[j]; + Tensor& ag = p.grads[j]; + for (size_t i = 0; i < ts; ++i) { + float old = TensorTools::access_element(v, i); + TensorTools::set_element(v, i, old - alpha); + float E_left = as_scalar(g.forward(expr)); + TensorTools::set_element(v, i, old + alpha); + float E_right = as_scalar(g.forward(expr)); + TensorTools::set_element(v, i, old); + float g = (E_right - E_left) / (2 * alpha); + float g_act = TensorTools::access_element(ag, i); + float f = fabs(g - g_act); + float m = std::max(fabs(g), fabs(g_act)); + if (f > 0.01 && m > 0.f) f /= m; + if (f > 0.01 || std::isnan(f)) { flag = true; if(verbosity > 0) { curr_flag = true; cerr << "***[" << f << "] "; } } + if(verbosity + (curr_flag ? 1 : 0) > 1) { + cerr << g_act << ' ' << g << endl; + curr_flag = false; + } + } + } + } + + if (flag) { + if (verbosity > 1) + cerr << endl << "*** GRADIENT CHECK FAILED ***" << endl; + } else { + if (verbosity > 0) + cerr << endl << "GRADIENT CHECK PASSED" << endl; + } + return !flag; +} + +} + diff --git a/thirdparty/dynet/dynet/grad-check.h b/thirdparty/dynet/dynet/grad-check.h new file mode 100644 index 000000000..517a778cf --- /dev/null +++ b/thirdparty/dynet/dynet/grad-check.h @@ -0,0 +1,16 @@ +#ifndef DYNET_GRAD_CHECK_H +#define DYNET_GRAD_CHECK_H + +#include "dynet/expr.h" + +namespace dynet { + +class Model; +struct ComputationGraph; + +// verbosity is zero for silence, one for only printing errors, two for everything +bool check_grad(Model& m, expr::Expression& expr, int verbosity = 1); + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/graph.cc b/thirdparty/dynet/dynet/graph.cc new file mode 100644 index 000000000..b0921bc98 --- /dev/null +++ b/thirdparty/dynet/dynet/graph.cc @@ -0,0 +1,34 @@ +#include "dynet/graph.h" +#include "dynet/dynet.h" +#include +#include "dynet/dynet-helper.h" + +using namespace std; + +namespace dynet { + +void graph_optimize(ComputationGraph* cg) { + // topo sort + vector& nodes = cg->nodes; + vector longest_paths(nodes.size()); + for (unsigned i = 0; i < nodes.size(); ++i) { + auto& v = *nodes[i]; // vertex v_i + auto& lp = longest_paths[i]; // distance to v_i + for (auto e : v.args) { + int weight = 0; + if (v.args.size() == 7) weight = 1; + int pte = longest_paths[e] + weight; + if (pte > lp) lp = pte; + } + } + for (unsigned i = 0; i < nodes.size(); ++i) { + vector x; + for (auto e : nodes[i]->args) { + x.push_back(string("x") + to_string(e)); + } + cerr << "LONGEST PATH: " << longest_paths[i] << "\tx" << i << " = " << nodes[i]->as_string(x) << endl; + } + throw std::runtime_error("Failure in graph optimization");// DEBUGGING +} + +} // namespaiice dynet diff --git a/thirdparty/dynet/dynet/graph.h b/thirdparty/dynet/dynet/graph.h new file mode 100644 index 000000000..bcbbf8f62 --- /dev/null +++ b/thirdparty/dynet/dynet/graph.h @@ -0,0 +1,9 @@ +#ifndef DYNET_GRAPH_H +#define DYNET_GRAPH_H + +namespace dynet { +struct ComputationGraph; +void graph_optimize(ComputationGraph* cg); +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/gru.cc b/thirdparty/dynet/dynet/gru.cc new file mode 100644 index 000000000..9feef64bd --- /dev/null +++ b/thirdparty/dynet/dynet/gru.cc @@ -0,0 +1,158 @@ +#include "dynet/gru.h" + +#include +#include +#include + +#include "dynet/nodes.h" +#include "dynet/training.h" + +using namespace std; + +namespace dynet { + +enum { X2Z, H2Z, BZ, X2R, H2R, BR, X2H, H2H, BH }; + +GRUBuilder::GRUBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) : hidden_dim(hidden_dim), layers(layers) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + // z + Parameter p_x2z = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2z = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bz = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // r + Parameter p_x2r = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2r = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_br = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // h + Parameter p_x2h = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2h = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bh = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = {p_x2z, p_h2z, p_bz, p_x2r, p_h2r, p_br, p_x2h, p_h2h, p_bh}; + params.push_back(ps); + } // layers + dropout_rate = 0.f; +} + +void GRUBuilder::new_graph_impl(ComputationGraph& cg) { + param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + + // z + Expression x2z = parameter(cg, p[X2Z]); + Expression h2z = parameter(cg, p[H2Z]); + Expression bz = parameter(cg, p[BZ]); + + // r + Expression x2r = parameter(cg, p[X2R]); + Expression h2r = parameter(cg, p[H2R]); + Expression br = parameter(cg, p[BR]); + + // h + Expression x2h = parameter(cg, p[X2H]); + Expression h2h = parameter(cg, p[H2H]); + Expression bh = parameter(cg, p[BH]); + + vector vars = {x2z, h2z, bz, x2r, h2r, br, x2h, h2h, bh}; + param_vars.push_back(vars); + } +} + +void GRUBuilder::start_new_sequence_impl(const std::vector& h_0) { + h.clear(); + h0 = h_0; + DYNET_ARG_CHECK(h0.empty() || h0.size() == layers, + "Number of inputs passed to initialize GRUBuilder (" << h0.size() << ") " + "is not equal to the number of layers (" << layers << ")"); +} + +Expression GRUBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "Number of inputs passed to RNNBuilder::set_h() (" << h_new.size() << ") " + "is not equal to the number of layers (" << layers << ")"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + h[t][i] = h_i; + } + return h[t].back(); +} +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression GRUBuilder::set_s_impl(int prev, const std::vector& s_new) { + return set_h_impl(prev, s_new); +} + +Expression GRUBuilder::add_input_impl(int prev, const Expression& x) { + //if(dropout_rate != 0.f) + //throw std::runtime_error("GRUBuilder doesn't support dropout yet"); + const bool has_initial_state = (h0.size() > 0); + h.push_back(vector(layers)); + vector& ht = h.back(); + Expression in = x; + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + Expression h_tprev; + // prev_zero means that h_tprev should be treated as 0 + bool prev_zero = false; + if (prev >= 0 || has_initial_state) { + h_tprev = (prev < 0) ? h0[i] : h[prev][i]; + } else { prev_zero = true; } + if (dropout_rate) in = dropout(in, dropout_rate); + // update gate + Expression zt; + if (prev_zero) + zt = affine_transform({vars[BZ], vars[X2Z], in}); + else + zt = affine_transform({vars[BZ], vars[X2Z], in, vars[H2Z], h_tprev}); + zt = logistic(zt); + // forget + Expression ft = 1.f - zt; + // reset gate + Expression rt; + if (prev_zero) + rt = affine_transform({vars[BR], vars[X2R], in}); + else + rt = affine_transform({vars[BR], vars[X2R], in, vars[H2R], h_tprev}); + rt = logistic(rt); + + // candidate activation + Expression ct; + if (prev_zero) { + ct = affine_transform({vars[BH], vars[X2H], in}); + ct = tanh(ct); + Expression nwt = cmult(zt, ct); + in = ht[i] = nwt; + } else { + Expression ght = cmult(rt, h_tprev); + ct = affine_transform({vars[BH], vars[X2H], in, vars[H2H], ght}); + ct = tanh(ct); + Expression nwt = cmult(zt, ct); + Expression crt = cmult(ft, h_tprev); + in = ht[i] = crt + nwt; + } + } + if (dropout_rate) return dropout(ht.back(), dropout_rate); + else return ht.back(); +} + +void GRUBuilder::copy(const RNNBuilder & rnn) { + const GRUBuilder & rnn_gru = (const GRUBuilder&)rnn; + if(params.size() != rnn_gru.params.size()) + DYNET_INVALID_ARG("Attempt to copy between two GRUBuilders that are not the same size"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_gru.params[i][j]; +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/gru.h b/thirdparty/dynet/dynet/gru.h new file mode 100644 index 000000000..efa01255c --- /dev/null +++ b/thirdparty/dynet/dynet/gru.h @@ -0,0 +1,52 @@ +#ifndef DYNET_GRU_H_ +#define DYNET_GRU_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" + +namespace dynet { + +class Model; + +struct GRUBuilder : public RNNBuilder { + GRUBuilder() = default; + explicit GRUBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + Expression back() const override { return (cur == -1? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { return final_h(); } + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { return get_h(i); } + unsigned num_h0_components() const override { return layers; } + void copy(const RNNBuilder & params) override; + + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + + protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + + // first index is time, second is layer + std::vector> h; + + // initial values of h at each layer + // - default to zero matrix input + std::vector h0; + + unsigned hidden_dim; + unsigned layers; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/hsm-builder.cc b/thirdparty/dynet/dynet/hsm-builder.cc new file mode 100644 index 000000000..64d000056 --- /dev/null +++ b/thirdparty/dynet/dynet/hsm-builder.cc @@ -0,0 +1,300 @@ +#include "dynet/hsm-builder.h" + +#include +#include +#include + +using namespace std; + +namespace dynet { + +using namespace expr; + +Cluster::Cluster() : initialized(false) {} +void Cluster::new_graph(ComputationGraph& cg) { + for (Cluster* child : children) { + child->new_graph(cg); + } + bias.pg = NULL; + weights.pg = NULL; +} + +Cluster* Cluster::add_child(unsigned sym) { + auto it = word2ind.find(sym); + unsigned i; + if (it == word2ind.end()) { + Cluster* c = new Cluster(); + c->rep_dim = rep_dim; + c->path = path; + c->path.push_back(sym); + i = children.size(); + word2ind.insert(make_pair(sym, i)); + children.push_back(c); + } + else { + i = it->second; + } + return children[i]; +} + +void Cluster::add_word(unsigned word) { + word2ind[word] = terminals.size(); + terminals.push_back(word); +} + +void Cluster::initialize(unsigned rep_dim, Model& model) { + this->rep_dim = rep_dim; + initialize(model); +} + +void Cluster::initialize(Model& model) { + output_size = (children.size() > 0) ? children.size() : terminals.size(); + + if (output_size == 1) { + } + else if (output_size == 2) { + p_weights = model.add_parameters({1, rep_dim}); + p_bias = model.add_parameters({1}, ParameterInitConst(0.f)); + } + else { + p_weights = model.add_parameters({output_size, rep_dim}); + p_bias = model.add_parameters({output_size}, ParameterInitConst(0.f)); + } + + for (Cluster* child : children) { + child->initialize(model); + } +} + +unsigned Cluster::num_children() const { + return children.size(); +} + +const Cluster* Cluster::get_child(unsigned i) const { + return children[i]; +} + +const vector& Cluster::get_path() const { return path; } +unsigned Cluster::get_index(unsigned word) const { return word2ind.find(word)->second; } +unsigned Cluster::get_word(unsigned index) const { return terminals[index]; } + +Expression Cluster::predict(Expression h, ComputationGraph& cg) const { + if (output_size == 1) { + return input(cg, 1.0f); + } + else { + Expression b = get_bias(cg); + Expression w = get_weights(cg); + return affine_transform({b, w, h}); + } +} + +Expression Cluster::neg_log_softmax(Expression h, unsigned r, ComputationGraph& cg) const { + if (output_size == 1) { + return input(cg, 0.0f); + } + else if (output_size == 2) { + Expression p = logistic(predict(h, cg)); + if (r == 1) { + p = 1 - p; + } + return -log(p); + } + else { + Expression dist = predict(h, cg); + return pickneglogsoftmax(dist, r); + } +} + +unsigned Cluster::sample(expr::Expression h, ComputationGraph& cg) const { + if (output_size == 1) { + return 0; + } + else if (output_size == 2) { + expr::Expression prob0_expr = logistic(predict(h, cg)); + double prob0 = as_scalar(cg.incremental_forward(prob0_expr)); + double p = rand01(); + if (p < prob0) { + return 0; + } + else { + return 1; + } + } + else { + expr::Expression dist_expr = softmax(predict(h, cg)); + vector dist = as_vector(cg.incremental_forward(dist_expr)); + unsigned c = 0; + double p = rand01(); + for (; c < dist.size(); ++c) { + p -= dist[c]; + if (p < 0.0) { break; } + } + if (c == dist.size()) { + --c; + } + return c; + } +} + +Expression Cluster::get_weights(ComputationGraph& cg) const { + if (weights.pg != &cg) { + weights = parameter(cg, p_weights); + } + return weights; +} + +Expression Cluster::get_bias(ComputationGraph& cg) const { + if (bias.pg != &cg) { + bias = parameter(cg, p_bias); + } + return bias; +} + +string Cluster::toString() const { + stringstream ss; + for (unsigned i = 0; i < path.size(); ++i) { + if (i != 0) { + ss << " "; + } + ss << path[i]; + } + return ss.str(); +} + +#if BOOST_VERSION >= 105600 + DYNET_SERIALIZE_COMMIT(Cluster, DYNET_SERIALIZE_DEFINE(rep_dim, children, path, terminals, word2ind)) +#else + template + void Cluster::serialize(Archive& ar, const unsigned int) { + DYNET_RUNTIME_ERR("Serializing clusters is only supported on versions of boost 1.56 or higher"); + } +#endif +DYNET_SERIALIZE_IMPL(Cluster) + +HierarchicalSoftmaxBuilder::HierarchicalSoftmaxBuilder(unsigned rep_dim, + const std::string& cluster_file, + Dict& word_dict, + Model& model) { + root = read_cluster_file(cluster_file, word_dict); + root->initialize(rep_dim, model); +} + +HierarchicalSoftmaxBuilder::~HierarchicalSoftmaxBuilder() { +} + +void HierarchicalSoftmaxBuilder::initialize(Model& model) { + root->initialize(model); +} + +void HierarchicalSoftmaxBuilder::new_graph(ComputationGraph& cg) { + pcg = &cg; + root->new_graph(cg); +} + +Expression HierarchicalSoftmaxBuilder::neg_log_softmax(const Expression& rep, unsigned wordidx) { + if(pcg != NULL) + DYNET_INVALID_ARG("In HierarchicalSoftmaxBuilder, you must call new_graph before calling neg_log_softmax!"); + Cluster* path = widx2path[wordidx]; + + unsigned i = 0; + const Cluster* node = root; + DYNET_ASSERT(root != NULL, "Null root in HierarchicalSoftmaxBuilder"); + vector log_probs; + Expression lp; + unsigned r; + while (node->num_children() > 0) { + r = node->get_index(path->get_path()[i]); + lp = node->neg_log_softmax(rep, r, *pcg); + log_probs.push_back(lp); + node = node->get_child(r); + DYNET_ASSERT(node != NULL, "Null node in HierarchicalSoftmaxBuilder"); + i += 1; + } + + r = path->get_index(wordidx); + lp = node->neg_log_softmax(rep, r, *pcg); + log_probs.push_back(lp); + + return sum(log_probs); +} + +unsigned HierarchicalSoftmaxBuilder::sample(const expr::Expression& rep) { + if(pcg != NULL) + DYNET_INVALID_ARG("In HierarchicalSoftmaxBuilder, you must call new_graph before calling sample!"); + + const Cluster* node = root; + vector dist; + unsigned c; + while (node->num_children() > 0) { + c = node->sample(rep, *pcg); + node = node->get_child(c); + } + + c = node->sample(rep, *pcg); + return node->get_word(c); +} + +Expression HierarchicalSoftmaxBuilder::full_log_distribution(const Expression& rep) { + DYNET_RUNTIME_ERR("full_distribution not implemented for HierarchicalSoftmaxBuilder"); + return dynet::expr::Expression(); +} + +inline bool is_ws(char x) { return (x == ' ' || x == '\t'); } +inline bool not_ws(char x) { return (x != ' ' && x != '\t'); } + +Cluster* HierarchicalSoftmaxBuilder::read_cluster_file(const std::string& cluster_file, Dict& word_dict) { + cerr << "Reading clusters from " << cluster_file << " ...\n"; + ifstream in(cluster_file); + if(!in) + DYNET_INVALID_ARG("HierarchicalSoftmaxBuilder couldn't read clusters from " << cluster_file); + int wc = 0; + string line; + vector path; + Cluster* root = new Cluster(); + while(getline(in, line)) { + path.clear(); + ++wc; + const unsigned len = line.size(); + unsigned startp = 0; + unsigned endp = 0; + while (startp < len) { + while (is_ws(line[startp]) && startp < len) { ++startp; } + endp = startp; + while (not_ws(line[endp]) && endp < len) { ++endp; } + string symbol = line.substr(startp, endp - startp); + path.push_back(path_symbols.convert(symbol)); + if (line[endp] == ' ') { + startp = endp + 1; + continue; + } + else { + break; + } + } + Cluster* node = root; + for (unsigned symbol : path) { + node = node->add_child(symbol); + } + + unsigned startw = endp; + while (is_ws(line[startw]) && startw < len) { ++startw; } + unsigned endw = startw; + while (not_ws(line[endw]) && endw < len) { ++endw; } + if(endp <= startp || startw <= endp || endw <= startw) + DYNET_INVALID_ARG("File formatting error in HierarchicalSoftmaxBuilder"); + + string word = line.substr(startw, endw - startw); + unsigned widx = word_dict.convert(word); + node->add_word(widx); + + if (widx2path.size() <= widx) { + widx2path.resize(widx + 1); + } + widx2path[widx] = node; + } + cerr << "Done reading clusters.\n"; + return root; +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/hsm-builder.h b/thirdparty/dynet/dynet/hsm-builder.h new file mode 100644 index 000000000..b04e61d69 --- /dev/null +++ b/thirdparty/dynet/dynet/hsm-builder.h @@ -0,0 +1,88 @@ +#ifndef DYNET_HSMBUILDER_H +#define DYNET_HSMBUILDER_H + +#include +#include +#include +#include "dynet/dynet.h" +#include "dynet/expr.h" +#include "dynet/dict.h" +#include "dynet/cfsm-builder.h" +#include "dynet/io-macros.h" + +namespace dynet { + +class Cluster { +private: + std::vector children; + std::vector path; + std::vector terminals; + std::unordered_map word2ind; + Parameter p_weights; + Parameter p_bias; + mutable expr::Expression weights; + mutable expr::Expression bias; + bool initialized; + unsigned rep_dim; + unsigned output_size; + + expr::Expression predict(expr::Expression h, ComputationGraph& cg) const; + DYNET_SERIALIZE_DECLARE() + +public: + Cluster(); + Cluster* add_child(unsigned sym); + void add_word(unsigned word); + void initialize(Model& model); + void initialize(unsigned rep_dim, Model& model); + + void new_graph(ComputationGraph& cg); + unsigned sample(expr::Expression h, ComputationGraph& cg) const; + expr::Expression neg_log_softmax(expr::Expression h, unsigned r, ComputationGraph& cg) const; + + unsigned get_index(unsigned word) const; + unsigned get_word(unsigned index) const; + unsigned num_children() const; + const Cluster* get_child(unsigned i) const; + const std::vector& get_path() const; + expr::Expression get_weights(ComputationGraph& cg) const; + expr::Expression get_bias(ComputationGraph& cg) const; + + std::string toString() const; +}; + +// helps with implementation of hierarchical softmax +// read a file with lines of the following format +// CLASSID word [freq] +class HierarchicalSoftmaxBuilder : public SoftmaxBuilder { + public: + HierarchicalSoftmaxBuilder(unsigned rep_dim, + const std::string& cluster_file, + Dict& word_dict, + Model& model); + ~HierarchicalSoftmaxBuilder(); + + void initialize(Model& model); + + // call this once per ComputationGraph + void new_graph(ComputationGraph& cg); + + // -log(p(c | rep) * p(w | c, rep)) + expr::Expression neg_log_softmax(const expr::Expression& rep, unsigned wordidx); + + // samples a word from p(w,c | rep) + unsigned sample(const expr::Expression& rep); + + expr::Expression full_log_distribution(const expr::Expression& rep); + + private: + Cluster* read_cluster_file(const std::string& cluster_file, Dict& word_dict); + std::vector widx2path; // will be NULL if not found + Dict path_symbols; + + ComputationGraph* pcg; + Cluster* root; +}; +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/init.cc b/thirdparty/dynet/dynet/init.cc new file mode 100644 index 000000000..74ba5f4ec --- /dev/null +++ b/thirdparty/dynet/dynet/init.cc @@ -0,0 +1,214 @@ +#include "dynet/init.h" +#include "dynet/aligned-mem-pool.h" +#include "dynet/dynet.h" +#include "dynet/weight-decay.h" +#include "dynet/globals.h" + +#include +#include +#include + +#if HAVE_CUDA +#include "dynet/cuda.h" +#include +#endif + +using namespace std; + +namespace dynet { + +DynetParams::DynetParams() : random_seed(0), mem_descriptor("512"), weight_decay(0), + shared_parameters(false) +#if HAVE_CUDA + , ngpus_requested(false), ids_requested(false), requested_gpus(-1) +#endif +{ +#if HAVE_CUDA + gpu_mask = std::vector(MAX_GPUS, 0); +#endif +} + +DynetParams::~DynetParams() +{ +} + +static void remove_args(int& argc, char**& argv, int& argi, int n) { + for (int i = argi + n; i < argc; ++i) + argv[i - n] = argv[i]; + argc -= n; + DYNET_ASSERT(argc >= 0, "remove_args less than 0"); +} + +DynetParams extract_dynet_params(int& argc, char**& argv, bool shared_parameters) { + DynetParams params; + params.shared_parameters = shared_parameters; + + int argi = 1; + +#if HAVE_CUDA + params.gpu_mask = std::vector(MAX_GPUS, 0); +#endif + + + while (argi < argc) { + string arg = argv[argi]; + + // Memory + if (arg == "--dynet-mem" || arg == "--dynet_mem") { + if ((argi + 1) > argc) { + throw std::invalid_argument("[dynet] --dynet-mem expects an argument (the memory, in megabytes, to reserve)"); + } else { + params.mem_descriptor = argv[argi + 1]; + remove_args(argc, argv, argi, 2); + } + } + + // Weight decay + else if (arg == "--dynet-weight-decay" || arg == "--dynet_weight_decay") { + if ((argi + 1) > argc) { + throw std::invalid_argument("[dynet] --dynet-weight-decay requires an argument (the weight decay per update)"); + } else { + string a2 = argv[argi + 1]; + istringstream d(a2); d >> params.weight_decay; + remove_args(argc, argv, argi, 2); + } + } + + // Random seed + else if (arg == "--dynet-seed" || arg == "--dynet_seed") { + if ((argi + 1) > argc) { + throw std::invalid_argument("[dynet] --dynet-seed expects an argument (the random number seed)"); + } else { + string a2 = argv[argi + 1]; + istringstream c(a2); c >> params.random_seed; + remove_args(argc, argv, argi, 2); + } + } + +#if HAVE_CUDA + // Number of GPUs + else if (arg == "--dynet_gpus" || arg == "--dynet-gpus") { + if ((argi + 1) > argc) { + throw std::invalid_argument("[dynet] --dynet-gpus expects an argument (number of GPUs to use)"); + } else { + if (params.ngpus_requested) + throw std::invalid_argument("Multiple instances of --dynet-gpus"); + params.ngpus_requested = true; + string a2 = argv[argi + 1]; + istringstream c(a2); c >> params.requested_gpus; + remove_args(argc, argv, argi, 2); + } + } + + // GPU ids + else if (arg == "--dynet_gpu_ids" || arg == "--dynet-gpu-ids") { + if ((argi + 1) > argc) { + throw std::invalid_argument("[dynet] --dynet-gpu-ids expects an argument (comma separated list of physical GPU ids to use)"); + } else { + string a2 = argv[argi + 1]; + if (params.ids_requested) + throw std::invalid_argument("Multiple instances of --dynet-gpu-ids"); + params.ids_requested = true; + if (a2.size() % 2 != 1) { + ostringstream oss; oss << "Bad argument to --dynet-gpu-ids: " << a2; throw std::invalid_argument(oss.str()); + } + for (unsigned i = 0; i < a2.size(); ++i) { + if ((i % 2 == 0 && (a2[i] < '0' || a2[i] > '9')) || + (i % 2 == 1 && a2[i] != ',')) { + ostringstream oss; oss << "Bad argument to --dynet-gpu-ids: " << a2; + throw std::invalid_argument(oss.str()); + } + if (i % 2 == 0) { + int gpu_id = a2[i] - '0'; + if (gpu_id >= MAX_GPUS) { throw std::runtime_error("DyNet hard limit on maximum number of GPUs (MAX_GPUS) exceeded. If you need more, modify the code to raise this hard limit."); } + params.gpu_mask[gpu_id]++; + params.requested_gpus++; + if (params.gpu_mask[gpu_id] != 1) { + ostringstream oss; oss << "Bad argument to --dynet-gpu-ids: " << a2; + throw std::invalid_argument(oss.str()); + } + } + } + remove_args(argc, argv, argi, 2); + } + } +#endif + + // Go to next argument + else { + argi++; + } + + } + +#if HAVE_CUDA + // Check for conflict between the two ways of requesting GPUs + if (params.ids_requested && params.ngpus_requested) + throw std::invalid_argument("Use only --dynet_gpus or --dynet_gpu_ids, not both\n"); +#endif + + return params; +} + +void initialize(DynetParams& params) { + if (default_device != nullptr) { + cerr << "WARNING: Attempting to initialize dynet twice. Ignoring duplicate initialization." << endl; + return; + } + + // initialize CUDA + vector gpudevices; +#if HAVE_CUDA + cerr << "[dynet] initializing CUDA\n"; + gpudevices = initialize_gpu(params); +#endif + + // Set random seed + if (params.random_seed == 0) { + random_device rd; + params.random_seed = rd(); + } + cerr << "[dynet] random seed: " << params.random_seed << endl; + rndeng = new mt19937(params.random_seed); + + // Set weight decay rate + if (params.weight_decay < 0 || params.weight_decay >= 1) + throw std::invalid_argument("[dynet] weight decay parameter must be between 0 and 1 (probably very small like 1e-6)\n"); + weight_decay_lambda = params.weight_decay; + + // Allocate memory + cerr << "[dynet] allocating memory: " << params.mem_descriptor << "MB\n"; + // TODO: Once multi-device support is added, we will potentially allocate both CPU + // and GPU, not either-or + int default_index = 0; + if (gpudevices.size() > 0) { + for (auto gpu : gpudevices) + devices.push_back(gpu); + } else { + devices.push_back(new Device_CPU(devices.size(), params.mem_descriptor, params.shared_parameters)); + } + default_device = devices[default_index]; + + // TODO these should be accessed through the relevant device and removed here + kSCALAR_MINUSONE = default_device->kSCALAR_MINUSONE; + kSCALAR_ONE = default_device->kSCALAR_ONE; + kSCALAR_ZERO = default_device->kSCALAR_ZERO; + cerr << "[dynet] memory allocation done.\n"; + +} + +void initialize(int& argc, char**& argv, bool shared_parameters) { + DynetParams params = extract_dynet_params(argc, argv, shared_parameters); + initialize(params); +} + +void cleanup() { + delete rndeng; + // TODO: Devices cannot be deleted at the moment + // for(Device* device : devices) delete device; + devices.clear(); + default_device = nullptr; +} + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/init.h b/thirdparty/dynet/dynet/init.h new file mode 100644 index 000000000..4e8b57a03 --- /dev/null +++ b/thirdparty/dynet/dynet/init.h @@ -0,0 +1,37 @@ +#ifndef DYNET_EIGEN_INIT_H +#define DYNET_EIGEN_INIT_H + +#include +#include + +namespace dynet { + +extern float weight_decay_lambda; + +/** + * \brief Represents general parameters for dynet + * + */ +struct DynetParams { + DynetParams(); + ~DynetParams(); + unsigned random_seed = 0; /**< The seed for random number generation */ + std::string mem_descriptor = "512"; /**< Total memory to be allocated for Dynet */ + float weight_decay = 0; /**< Weight decay rate for L2 regularization */ + bool shared_parameters = false; /**< TO DOCUMENT */ + bool ngpus_requested = false; /**< GPUs requested by number */ + bool ids_requested = false; /**< GPUs requested by ids */ + int requested_gpus = -1; /**< Number of requested GPUs */ + std::vector gpu_mask; /**< List of required GPUs by ids */ + + +}; + +DynetParams extract_dynet_params(int& argc, char**& argv, bool shared_parameters = false); +void initialize(DynetParams& params); +void initialize(int& argc, char**& argv, bool shared_parameters = false); +void cleanup(); + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/io-macros.h b/thirdparty/dynet/dynet/io-macros.h new file mode 100644 index 000000000..b4932dc0d --- /dev/null +++ b/thirdparty/dynet/dynet/io-macros.h @@ -0,0 +1,165 @@ +#ifndef DYNET_IO_MACROS__ +#define DYNET_IO_MACROS__ + +#include +#include +#include +#include + +#if BOOST_VERSION >= 105600 +#include +#endif + +#ifndef __CUDACC__ +#include +#include +#include +#include +#include +#endif + +#define MAX_SERIALIZE_VERSION 1024 + +#define DYNET_SERIALIZE_IMPL(MyClass) \ + template void MyClass::serialize(boost::archive::text_oarchive &ar, const unsigned int); \ + template void MyClass::serialize(boost::archive::text_iarchive &ar, const unsigned int); \ + template void MyClass::serialize(boost::archive::binary_oarchive &ar, const unsigned int); \ + template void MyClass::serialize(boost::archive::binary_iarchive &ar, const unsigned int); + +#define DYNET_SAVELOAD_IMPL(MyClass) \ + template void MyClass::save(boost::archive::text_oarchive &ar, const unsigned int) const; \ + template void MyClass::load(boost::archive::text_iarchive &ar, const unsigned int); \ + template void MyClass::save(boost::archive::binary_oarchive &ar, const unsigned int) const; \ + template void MyClass::load(boost::archive::binary_iarchive &ar, const unsigned int); + +#ifdef _MSC_VER +// for BOOST_PP_REPEAT usage, wrap the parameters with PP_NARG +#define DYNET_PP_FOREACH_ARRAY( ... ) ( BOOST_PP_VARIADIC_SIZE(__VA_ARGS__) , ( __VA_ARGS__ ) ) + +// apply A to all following parameters +#define DYNET_PP_FOREACH( A, ... ) BOOST_PP_REPEAT(BOOST_PP_VARIADIC_SIZE(__VA_ARGS__), A, DYNET_PP_FOREACH_ARRAY(__VA_ARGS__) ) +#else +#define DYNET_PP_NARG_(x64, x63, x62, x61, x60, x59, x58, x57, x56, x55, x54, x53, x52, x51, x50, x49, x48, x47, x46, x45, x44, x43, x42, x41, x40, x39, x38, x37, x36, x35, x34, x33, x32, x31, x30, x29, x28, x27, x26, x25, x24, x23, x22, x21, x20, x19, x18, x17, x16, x15, x14, x13, x12, x11, x10, x9, x8, x7, x6, x5, x4, x3, x2, x1, n, ...) n +// currently only support max 64 number of parameters +#define DYNET_PP_NARG(...) DYNET_PP_NARG_(__VA_ARGS__, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) + +// for BOOST_PP_REPEAT usage, wrap the parameters with PP_NARG +#define DYNET_PP_FOREACH_ARRAY( ... ) ( DYNET_PP_NARG(__VA_ARGS__) , ( __VA_ARGS__ ) ) + +// apply A to all following parameters +#define DYNET_PP_FOREACH( A, ... ) BOOST_PP_REPEAT(DYNET_PP_NARG(__VA_ARGS__), A, DYNET_PP_FOREACH_ARRAY(__VA_ARGS__) ) +#endif + +#define DYNET_ARCHIVE(z, n, data) \ + ar & BOOST_PP_ARRAY_ELEM(n, data); + +#define DYNET_UNFOLD(z, n, data) \ + BOOST_PP_ARRAY_ELEM(n, data); + +#define DYNET_FUNCTOR(z, n, data) \ + data; + +// declare INTERFACE +#define DYNET_SERIALIZE_DECLARE() \ + friend class boost::serialization::access; \ + template \ + void serialize(Archive &ar, const unsigned int); + +// split declare INTERFACE +#define DYNET_SERIALIZE_SPLIT_DECLARE() \ + friend class boost::serialization::access; \ + template \ + void save(Archive & ar, const unsigned int) const; \ + template \ + void load(Archive & ar, const unsigned int); \ + BOOST_SERIALIZATION_SPLIT_MEMBER() + +// INTERFACE: empty serialization definition macro +#define DYNET_SERIALIZE_COMMIT_EMPTY(...) \ + friend class boost::serialization::access; \ + template \ + void serialize(Archive & ar, const unsigned int version) {} + +// INTERFACE: commit serialization operation macro +#define DYNET_SERIALIZE_COMMIT(MyClass, ...) \ + template \ + void MyClass::serialize(Archive & ar, const unsigned int version) { \ + DYNET_PP_FOREACH(DYNET_UNFOLD, __VA_ARGS__) \ + } + +// INTERFACE: commit serialization save operation macro +#define DYNET_SERIALIZE_SAVE_COMMIT(MyClass, ...) \ + template \ + void MyClass::save(Archive & ar, const unsigned int version) const { \ + DYNET_PP_FOREACH(DYNET_UNFOLD, __VA_ARGS__) \ + } + +// INTERFACE: commit serialization load operation macro +#define DYNET_SERIALIZE_LOAD_COMMIT(MyClass, FUNC, ...) \ + template \ + void MyClass::load(Archive & ar, const unsigned int version) { \ + DYNET_PP_FOREACH(DYNET_UNFOLD, __VA_ARGS__) \ + FUNC; \ + } + +// INTERFACE: specify serialize version macro +#define DYNET_VERSION_DEFINE(T, VERSION) BOOST_CLASS_VERSION(T, VERSION) + +// INTERFACE: serialize definition macro +#define DYNET_SERIALIZE_DEFINE(...) \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) + +// INTERFACE: serialize definition macro for derived class +#define DYNET_SERIALIZE_DERIVED_DEFINE(T, ...) \ + ar & boost::serialization::base_object(*this); \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) + +// INTERFACE: serialize definition macro for derived class which is equal to base class +#define DYNET_SERIALIZE_DERIVED_EQ_DEFINE(T) \ + ar & boost::serialization::base_object(*this); + +#ifdef _MSC_VER + +#define DYNET_VERSION_SERIALIZE_DEFINE(l, r, ...) \ + if (l >= 0 && r >= 0 && l < r && version >= l && version < r) { \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) \ + } + +#define DYNET_VERSION_SERIALIZE_DERIVED_DEFINE(T, l, r, ...) \ + if (l >= 0 && r >= 0 && l < r && version >= l && version < r) { \ + ar & boost::serialization::base_object(*this); \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) \ + } }) + +#else +// INTERFACE: serialize definition with version macro, l <= version < r +#define DYNET_VERSION_SERIALIZE_DEFINE(l, r, ...) \ + !(l >= 0) ? (void)0 : \ + !(r >= 0) ? (void)0 : \ + !(l < r) ? (void)0 : \ + ({ if (version >= l && version < r) { \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) \ + } }) + +// INTERFACE: serialize definition with version macro for derived class, l <= version < r +#define DYNET_VERSION_SERIALIZE_DERIVED_DEFINE(T, l, r, ...) \ + !(l >= 0) ? (void)0 : \ + !(r >= 0) ? (void)0 : \ + !(l < r) ? (void)0 : \ + ({ if (version >= l && version < r) { \ + ar & boost::serialization::base_object(*this); \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) \ + } }) +#endif + +// INTERFACE: serialize definition macro for non-intrusive impl +#define DYNET_NINTRUSIVE_SERIALIZE_DEFINE(param, ...) \ +namespace boost { \ +namespace serialization { \ + template \ + void serialize(Archive & ar, param, const unsigned int) { \ + DYNET_PP_FOREACH(DYNET_ARCHIVE, __VA_ARGS__) \ + } \ +} /* namespace serialization */ } /* namespace boost */ + +#endif diff --git a/thirdparty/dynet/dynet/lstm.cc b/thirdparty/dynet/dynet/lstm.cc new file mode 100644 index 000000000..a0097a7aa --- /dev/null +++ b/thirdparty/dynet/dynet/lstm.cc @@ -0,0 +1,620 @@ +#include "dynet/lstm.h" + +#include +#include +#include +#include + +#include "dynet/nodes.h" + +using namespace std; +using namespace dynet::expr; + +namespace dynet { + +enum { X2I, H2I, C2I, BI, X2O, H2O, C2O, BO, X2C, H2C, BC }; + +LSTMBuilder::LSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) : layers(layers), input_dim(input_dim), hid(hidden_dim) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_x2i = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bi = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // o + Parameter p_x2o = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2o = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_c2o = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bo = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // c + Parameter p_x2c = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2c = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bc = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; + params.push_back(ps); + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; + dropout_rate_c = 0.f; +} + +void LSTMBuilder::new_graph_impl(ComputationGraph& cg) { + param_vars.clear(); + + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + + //i + Expression i_x2i = parameter(cg, p[X2I]); + Expression i_h2i = parameter(cg, p[H2I]); + Expression i_c2i = parameter(cg, p[C2I]); + Expression i_bi = parameter(cg, p[BI]); + //o + Expression i_x2o = parameter(cg, p[X2O]); + Expression i_h2o = parameter(cg, p[H2O]); + Expression i_c2o = parameter(cg, p[C2O]); + Expression i_bo = parameter(cg, p[BO]); + //c + Expression i_x2c = parameter(cg, p[X2C]); + Expression i_h2c = parameter(cg, p[H2C]); + Expression i_bc = parameter(cg, p[BC]); + + vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; + param_vars.push_back(vars); + } + _cg = &cg; +} + +// layout: 0..layers = c +// layers+1..2*layers = h +void LSTMBuilder::start_new_sequence_impl(const vector& hinit) { + // Check input dim and hidden dim + if (input_dim != params[0][X2I].dim()[1]) { + cerr << "Warning : LSTMBuilder input dimension " << input_dim + << " doesn't match with parameter dimension " << params[0][X2I].dim()[1] + << ". Setting input_dim to " << params[0][X2I].dim()[1] << endl; + input_dim = params[0][X2I].dim()[1]; + } + if (hid != params[0][X2I].dim()[0]) { + cerr << "Warning : LSTMBuilder hidden dimension " << hid + << " doesn't match with parameter dimension " << params[0][X2I].dim()[0] + << ". Setting hid to " << params[0][X2I].dim()[0] << endl; + hid = params[0][X2I].dim()[0]; + } + + h.clear(); + c.clear(); + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "LSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state and cell for each layer). However, for " << layers << " layers, " + << hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } + // Init dropout masks + set_dropout_masks(); +} + +void LSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float retention_rate_c = 1.f - dropout_rate_c; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + float scale_c = 1.f / retention_rate_c; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim}, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid}, batch_size), retention_rate_h, scale_h)); + // c + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid}, batch_size), retention_rate_c, scale_c)); + masks.push_back(masks_i); + } + } +} +// TO DO - Make this correct +// Copied c from the previous step (otherwise c.size()< h.size()) +// Also is creating a new step something we want? +// wouldn't overwriting the current one be better? +Expression LSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "LSTMBuilder::set_h expects as many inputs as layers, but got " << h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression LSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "LSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} + +Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + Expression i_h_tm1, i_c_tm1; + bool has_prev_state = (prev >= 0 || has_initial_state); + if (prev < 0) { + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + } else { // t > 0 + i_h_tm1 = h[prev][i]; + i_c_tm1 = c[prev][i]; + } + + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + // x + if (dropout_rate > 0.f) { + in = cmult(in, masks[i][0]); + + } + // h + if (has_prev_state && dropout_rate_h > 0.f) + i_h_tm1 = cmult(i_h_tm1, masks[i][1]); + // For c, create another variable since we still need full i_c_tm1 for the componentwise mult + Expression i_dropped_c_tm1; + if (has_prev_state) { + i_dropped_c_tm1 = i_c_tm1; + if (dropout_rate_c > 0.f) + i_dropped_c_tm1 = cmult(i_dropped_c_tm1, masks[i][2]); + } + + // input + Expression i_ait; + if (has_prev_state) + i_ait = affine_transform({vars[BI], vars[X2I], in, vars[H2I], i_h_tm1, vars[C2I], i_dropped_c_tm1}); + else + i_ait = affine_transform({vars[BI], vars[X2I], in}); + Expression i_it = logistic(i_ait); + // forget + Expression i_ft = 1.f - i_it; + // write memory cell + Expression i_awt; + if (has_prev_state) + i_awt = affine_transform({vars[BC], vars[X2C], in, vars[H2C], i_h_tm1}); + else + i_awt = affine_transform({vars[BC], vars[X2C], in}); + Expression i_wt = tanh(i_awt); + // output + if (has_prev_state) { + Expression i_nwt = cmult(i_it, i_wt); + Expression i_crt = cmult(i_ft, i_c_tm1); + ct[i] = i_crt + i_nwt; + } else { + ct[i] = cmult(i_it, i_wt); + } + + Expression i_aot; + // Drop c. Uses the same mask as c_tm1. is this justified? + Expression dropped_c = ct[i]; + if (dropout_rate_c > 0.f) + dropped_c = cmult(dropped_c, masks[i][2]); + if (has_prev_state) + i_aot = affine_transform({vars[BO], vars[X2O], in, vars[H2O], i_h_tm1, vars[C2O], dropped_c}); + else + i_aot = affine_transform({vars[BO], vars[X2O], in, vars[C2O], dropped_c}); + Expression i_ot = logistic(i_aot); + Expression ph_t = tanh(ct[i]); + in = ht[i] = cmult(i_ot, ph_t); + } + return ht.back(); +} + +void LSTMBuilder::copy(const RNNBuilder & rnn) { + const LSTMBuilder & rnn_lstm = (const LSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy LSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; +} + +void LSTMBuilder::save_parameters_pretraining(const string& fname) const { + cerr << "Writing LSTM parameters to " << fname << endl; + ofstream of(fname); + if (!of) + DYNET_INVALID_ARG("Couldn't write LSTM parameters to " << fname); + boost::archive::binary_oarchive oa(of); + std::string id = "LSTMBuilder:params"; + oa << id; + oa << layers; + for (unsigned i = 0; i < layers; ++i) { + for (auto p : params[i]) { + oa << p.get()->values; + } + } +} + +void LSTMBuilder::load_parameters_pretraining(const string& fname) { + cerr << "Loading LSTM parameters from " << fname << endl; + ifstream of(fname); + if (!of) + DYNET_INVALID_ARG("Couldn't read LSTM parameters from " << fname); + boost::archive::binary_iarchive ia(of); + std::string id; + ia >> id; + if (id != "LSTMBuilder:params") + DYNET_INVALID_ARG("Bad id read in LSTMBuilder::load_parameters_pretraining. Invalid model format?"); + unsigned l = 0; + ia >> l; + if (l != layers) + DYNET_INVALID_ARG("Bad number of layers in LSTMBuilder::load_parameters_pretraining. Invalid model format?"); + // TODO check other dimensions + for (unsigned i = 0; i < layers; ++i) { + for (auto p : params[i]) { + ia >> p.get()->values; + } + } +} + +void LSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; + dropout_rate_c = d; +} + +void LSTMBuilder::set_dropout(float d, float d_h, float d_c) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f && d_c >= 0.f && d_c <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; + dropout_rate_c = d_c; +} + +void LSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; + dropout_rate_c = 0.f; +} + +DYNET_SERIALIZE_COMMIT(LSTMBuilder, + DYNET_SERIALIZE_DERIVED_DEFINE(RNNBuilder, params, layers, dropout_rate), + DYNET_VERSION_SERIALIZE_DEFINE(1, MAX_SERIALIZE_VERSION, dropout_rate_h, dropout_rate_c, input_dim, hid)) + +DYNET_SERIALIZE_IMPL(LSTMBuilder); + +// Vanilla LSTM + +//enum { _X2I, _H2I, _C2I, _BI, _X2F, _H2F, _C2F, _BF, _X2O, _H2O, _C2O, _BO, _X2G, _H2G, _C2G, _BG }; +enum { _X2I, _H2I, _BI, _X2F, _H2F, _BF, _X2O, _H2O, _BO, _X2G, _H2G, _BG }; +enum { LN_GH, LN_BH, LN_GX, LN_BX, LN_GC, LN_BC}; + +VanillaLSTMBuilder::VanillaLSTMBuilder() : has_initial_state(false), layers(0), input_dim(0), hid(0), dropout_rate_h(0), ln_lstm(false) { } + +VanillaLSTMBuilder::VanillaLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model, + bool ln_lstm) : layers(layers), input_dim(input_dim), hid(hidden_dim), ln_lstm(ln_lstm) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + // [i; f; o; g] + Parameter p_x2i = model.add_parameters({hidden_dim * 4, layer_input_dim}); + Parameter p_h2i = model.add_parameters({hidden_dim * 4, hidden_dim}); + //Parameter p_c2i = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_bi = model.add_parameters({hidden_dim * 4}, ParameterInitConst(0.f)); + + + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = {p_x2i, p_h2i, /*p_c2i,*/ p_bi}; + params.push_back(ps); + + if (ln_lstm){ + Parameter p_gh = model.add_parameters({hidden_dim * 4}, ParameterInitConst(1.f)); + Parameter p_bh = model.add_parameters({hidden_dim * 4}, ParameterInitConst(0.f)); + Parameter p_gx = model.add_parameters({hidden_dim * 4}, ParameterInitConst(1.f)); + Parameter p_bx = model.add_parameters({hidden_dim * 4}, ParameterInitConst(0.f)); + Parameter p_gc = model.add_parameters({hidden_dim}, ParameterInitConst(1.f)); + Parameter p_bc = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + vector ln_ps = {p_gh, p_bh, p_gx, p_bx, p_gc, p_bc}; + ln_params.push_back(ln_ps); + } + } // layers + dropout_rate = 0.f; + dropout_rate_h = 0.f; +} + +void VanillaLSTMBuilder::new_graph_impl(ComputationGraph& cg) { + param_vars.clear(); + if (ln_lstm)ln_param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + auto& p = params[i]; + vector vars; + for (unsigned j = 0; j < p.size(); ++j) { vars.push_back(parameter(cg, p[j])); } + param_vars.push_back(vars); + if (ln_lstm){ + auto& ln_p = ln_params[i]; + vector ln_vars; + for (unsigned j = 0; j < ln_p.size(); ++j) { ln_vars.push_back(parameter(cg, ln_p[j])); } + ln_param_vars.push_back(ln_vars); + } + } + + _cg = &cg; +} +// layout: 0..layers = c +// layers+1..2*layers = h +void VanillaLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers * 2 == hinit.size(), + "VanillaLSTMBuilder must be initialized with 2 times as many expressions as layers " + "(hidden state, and cell for each layer). However, for " << layers << " layers, " << + hinit.size() << " expressions were passed in"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } + + // Init droupout masks + set_dropout_masks(); +} + +void VanillaLSTMBuilder::set_dropout_masks(unsigned batch_size) { + masks.clear(); + for (unsigned i = 0; i < layers; ++i) { + std::vector masks_i; + unsigned idim = (i == 0) ? input_dim : hid; + if (dropout_rate > 0.f) { + float retention_rate = 1.f - dropout_rate; + float retention_rate_h = 1.f - dropout_rate_h; + float scale = 1.f / retention_rate; + float scale_h = 1.f / retention_rate_h; + // in + masks_i.push_back(random_bernoulli(*_cg, Dim({ idim}, batch_size), retention_rate, scale)); + // h + masks_i.push_back(random_bernoulli(*_cg, Dim({ hid}, batch_size), retention_rate_h, scale_h)); + masks.push_back(masks_i); + } + } +} + + +// TODO - Make this correct +// Copied c from the previous step (otherwise c.size()< h.size()) +// Also is creating a new step something we want? +// wouldn't overwriting the current one be better? +Expression VanillaLSTMBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "VanillaLSTMBuilder::set_h expects as many inputs as layers, but got " << + h_new.size() << " inputs for " << layers << " layers"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression VanillaLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + DYNET_ARG_CHECK(s_new.size() == layers || s_new.size() == 2 * layers, + "VanillaLSTMBuilder::set_s expects either as many inputs or twice as many inputs as layers, but got " << s_new.size() << " inputs for " << layers << " layers"); + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} + +Expression VanillaLSTMBuilder::add_input_impl(int prev, const Expression& x) { + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + Expression in = x; + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + const vector& ln_vars = ln_param_vars[i]; + Expression i_h_tm1, i_c_tm1; + bool has_prev_state = (prev >= 0 || has_initial_state); + if (prev < 0) { + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_tm1 = h0[i]; + i_c_tm1 = c0[i]; + } + } else { // t > 0 + i_h_tm1 = h[prev][i]; + i_c_tm1 = c[prev][i]; + } + // apply dropout according to https://arxiv.org/abs/1512.05287 (tied weights) + if (dropout_rate > 0.f) { + in = cmult(in, masks[i][0]); + + } + if (has_prev_state && dropout_rate_h > 0.f) + i_h_tm1 = cmult(i_h_tm1, masks[i][1]); + // input + Expression tmp; + Expression i_ait; + Expression i_aft; + Expression i_aot; + Expression i_agt; + if (ln_lstm){ + if (has_prev_state) + tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]) + layer_norm(vars[_H2I] * i_h_tm1, ln_vars[LN_GH], ln_vars[LN_BH]); + else + tmp = vars[_BI] + layer_norm(vars[_X2I] * in, ln_vars[LN_GX], ln_vars[LN_BX]); + }else{ + if (has_prev_state) + tmp = affine_transform({vars[_BI], vars[_X2I], in, vars[_H2I], i_h_tm1}); + else + tmp = affine_transform({vars[_BI], vars[_X2I], in}); + } + i_ait = pick_range(tmp, 0, hid); + i_aft = pick_range(tmp, hid, hid * 2); + i_aot = pick_range(tmp, hid * 2, hid * 3); + i_agt = pick_range(tmp, hid * 3, hid * 4); + Expression i_it = logistic(i_ait); + // TODO(odashi): Should the forget bias be a hyperparameter? + Expression i_ft = logistic(i_aft + 1.f); + Expression i_ot = logistic(i_aot); + Expression i_gt = tanh(i_agt); + + ct[i] = has_prev_state ? (cmult(i_ft, i_c_tm1) + cmult(i_it, i_gt)) : cmult(i_it, i_gt); + if (ln_lstm) + in = ht[i] = cmult(i_ot, tanh(layer_norm(ct[i],ln_vars[LN_GC],ln_vars[LN_BC]))); + else + in = ht[i] = cmult(i_ot, tanh(ct[i])); + } + return ht.back(); +} + +void VanillaLSTMBuilder::copy(const RNNBuilder & rnn) { + const VanillaLSTMBuilder & rnn_lstm = (const VanillaLSTMBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_lstm.params.size(), + "Attempt to copy VanillaLSTMBuilder with different number of parameters " + "(" << params.size() << " != " << rnn_lstm.params.size() << ")"); + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; + for (size_t i = 0; i < ln_params.size(); ++i) + for (size_t j = 0; j < ln_params[i].size(); ++j) + ln_params[i][j] = rnn_lstm.ln_params[i][j]; +} + +void VanillaLSTMBuilder::save_parameters_pretraining(const string& fname) const { + cerr << "Writing VanillaLSTM parameters to " << fname << endl; + ofstream of(fname); + if (!of) + DYNET_INVALID_ARG("Couldn't write LSTM parameters to " << fname); + boost::archive::binary_oarchive oa(of); + std::string id = "VanillaLSTMBuilder:params"; + oa << id; + oa << layers; + for (unsigned i = 0; i < layers; ++i) { + for (auto p : params[i]) { + oa << p.get()->values; + } + for (auto p : ln_params[i]) { + oa << p.get()->values; + } + } +} + +void VanillaLSTMBuilder::load_parameters_pretraining(const string& fname) { + cerr << "Loading VanillaLSTM parameters from " << fname << endl; + ifstream of(fname); + if (!of) + DYNET_INVALID_ARG("Couldn't read LSTM parameters from " << fname); + boost::archive::binary_iarchive ia(of); + std::string id; + ia >> id; + if (id != "VanillaLSTMBuilder:params") + DYNET_INVALID_ARG("Bad id read in VanillaLSTMBuilder::load_parameters_pretraining. Bad model format?"); + unsigned l = 0; + ia >> l; + if (l != layers) + DYNET_INVALID_ARG("Bad number of layers in VanillaLSTMBuilder::load_parameters_pretraining. Bad model format?"); + // TODO check other dimensions + for (unsigned i = 0; i < layers; ++i) { + for (auto p : params[i]) { + ia >> p.get()->values; + } + for (auto p : ln_params[i]) { + ia >> p.get()->values; + } + } +} + +void VanillaLSTMBuilder::set_dropout(float d) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d; +} + +void VanillaLSTMBuilder::set_dropout(float d, float d_h) { + DYNET_ARG_CHECK(d >= 0.f && d <= 1.f && d_h >= 0.f && d_h <= 1.f, + "dropout rate must be a probability (>=0 and <=1)"); + dropout_rate = d; + dropout_rate_h = d_h; +} + +void VanillaLSTMBuilder::disable_dropout() { + dropout_rate = 0.f; + dropout_rate_h = 0.f; +} + +DYNET_SERIALIZE_COMMIT(VanillaLSTMBuilder, + DYNET_SERIALIZE_DERIVED_DEFINE(RNNBuilder, params, layers, dropout_rate, dropout_rate_h, hid, input_dim), + DYNET_VERSION_SERIALIZE_DEFINE(1, MAX_SERIALIZE_VERSION, ln_params, ln_lstm)) +DYNET_SERIALIZE_IMPL(VanillaLSTMBuilder); + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/lstm.h b/thirdparty/dynet/dynet/lstm.h new file mode 100644 index 000000000..03ca42d0e --- /dev/null +++ b/thirdparty/dynet/dynet/lstm.h @@ -0,0 +1,319 @@ +/** + * \file lstm.h + * \brief Helper structures to build recurrent units + * + * \details TODO: Create documentation and explain rnns, etc... + */ +#ifndef DYNET_LSTM_H_ +#define DYNET_LSTM_H_ + +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" +#include + + +using namespace dynet::expr; + +namespace dynet { + +class Model; +/** + * \ingroup rnnbuilders + * \brief LSTMBuilder creates an LSTM unit with coupled input and forget gate as well as peepholes connections. + * + * \details More specifically, here are the equations for the dynamics of this cell : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}x_t+W_{ih}h_{t-1}+W_{ic}c_{t-1}+b_i)\\ + \tilde{c_t} & = \tanh(W_{cx}x_t+W_{ch}h_{t-1}+b_c)\\ + c_t & = c_{t-1}\circ (1-i_t) + \tilde{c_t}\circ i_t\\ + & = c_{t-1} + (\tilde{c_t}-c_{t-1})\circ i_t\\ + o_t & = \sigma(W_{ox}x_t+W_{oh}h_{t-1}+W_{oc}c_{t}+b_o)\\ + h_t & = \tanh(c_t)\circ o_t\\ +\end{split} +\f$ + */ +struct LSTMBuilder : public RNNBuilder { + /** + * \brief Default constructor + */ + LSTMBuilder() = default; + /** + * \brief Constructor for the LSTMBuilder + * + * \param layers Number of layers + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model Model holding the parameters + */ + explicit LSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + /** + * @brief Number of components in `h_0` + * @details For `LSTMBuilder`, this corresponds to `2 * layers` because it includes the initial cell state \f$c_0\f$ + * @return `2 * layers` + */ + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + + /** + * @brief Get the final state of the hidden layer + * @details For `LSTMBuilder`, this consists of a vector of the memory cell values for each layer (l1, l2, l3), + * followed by the hidden state values + * @return {c_{l1}, c_{l1}, ..., h_{l1}, h_{l2}, ...} + */ + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + void save_parameters_pretraining(const std::string& fname) const override; + void load_parameters_pretraining(const std::string& fname) override; + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h,d_c)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h,c\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is an adaptation of the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$,\f$\mathbf{z_c}\sim \mathrm{Bernoulli}(1-d_c)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x} {\mathbf{z_x}} \circ x_t)+W_{ih}(\frac 1 {1-d_h} {\mathbf{z_h}} \circ h_{t-1})+W_{ic}(\frac 1 {1-d_c} {\mathbf{z_c}} \circ c_{t-1})+b_i)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x} {\mathbf{z_x}} \circ x_t)+W_{ch}(\frac 1 {1-d_h} {\mathbf{z_h}} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ (1-i_t) + \tilde{c_t}\circ i_t\\ + & = c_{t-1} + (\tilde{c_t}-c_{t-1})\circ i_t\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x} {\mathbf{z_x}} \circ x_t)+W_{oh}(\frac 1 {1-d_h} {\mathbf{z_h}} \circ h_{t-1})+W_{oc}(\frac 1 {1-d_c} {\mathbf{z_c}} \circ c_{t})+b_o)\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_x\f$ for the output \f$h_t\f$ + * \param d_c Dropout rate \f$d_x\f$ for the cell \f$c_t\f$ + */ + void set_dropout(float d, float d_h, float d_c); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific bathc size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); +protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + +public: + // first index is layer, then ... + std::vector> params; + + // first index is layer, then ... + std::vector> param_vars; + + // first index is layer, then ... + // masks for Gal dropout + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim = 0; + unsigned hid = 0; + + float dropout_rate_h = 0.f, dropout_rate_c = 0.f; + +private: + DYNET_SERIALIZE_DECLARE() + ComputationGraph *_cg; + +}; + + +/** + * \ingroup rnnbuilders + * @brief VanillaLSTM allows to create an "standard" LSTM, ie with decoupled input and forget gate and no peepholes connections + * @details This cell runs according to the following dynamics : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}x_t+W_{ih}h_{t-1}+b_i)\\ + f_t & = \sigma(W_{fx}x_t+W_{fh}h_{t-1}+b_f+1)\\ + o_t & = \sigma(W_{ox}x_t+W_{oh}h_{t-1}+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}x_t+W_{ch}h_{t-1}+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + */ +struct VanillaLSTMBuilder : public RNNBuilder { + /** + * @brief Default Constructor + */ + VanillaLSTMBuilder(); + /** + * \brief Constructor for the VanillaLSTMBuilder + * + * \param layers Number of layers + * \param input_dim Dimention of the input \f$x_t\f$ + * \param hidden_dim Dimention of the hidden states \f$h_t\f$ and \f$c_t\f$ + * \param model Model holding the parameters + * \param ln_lstm Whether to use layer normalization + */ + explicit VanillaLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model, + bool ln_lstm = false); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { + std::vector ret = (c.size() == 0 ? c0 : c.back()); + for (auto my_h : final_h()) ret.push_back(my_h); + return ret; + } + unsigned num_h0_components() const override { return 2 * layers; } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { + std::vector ret = (i == -1 ? c0 : c[i]); + for (auto my_h : get_h(i)) ret.push_back(my_h); + return ret; + } + + void copy(const RNNBuilder & params) override; + + void save_parameters_pretraining(const std::string& fname) const override; + void load_parameters_pretraining(const std::string& fname) override; + /** + * \brief Set the dropout rates to a unique value + * \details This has the same effect as `set_dropout(d,d_h)` except that all the dropout rates are set to the same value. + * \param d Dropout rate to be applied on all of \f$x,h\f$ + */ + void set_dropout(float d); + /** + * \brief Set the dropout rates + * \details The dropout implemented here is the variational dropout with tied weights introduced in [Gal, 2016](http://papers.nips.cc/paper/6241-a-theoretically-grounded-application-of-dropout-in-recurrent-neural-networks) + * More specifically, dropout masks \f$\mathbf{z_x}\sim \mathrm{Bernoulli}(1-d_x)\f$,\f$\mathbf{z_h}\sim \mathrm{Bernoulli}(1-d_h)\f$ are sampled at the start of each sequence. + * The dynamics of the cell are then modified to : + * + * \f$ + * \begin{split} + i_t & =\sigma(W_{ix}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ih}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_i)\\ + f_t & = \sigma(W_{fx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{fh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_f)\\ + o_t & = \sigma(W_{ox}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{oh}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_o)\\ + \tilde{c_t} & = \tanh(W_{cx}(\frac 1 {1-d_x}\mathbf{z_x} \circ x_t)+W_{ch}(\frac 1 {1-d_h}\mathbf{z_h} \circ h_{t-1})+b_c)\\ + c_t & = c_{t-1}\circ f_t + \tilde{c_t}\circ i_t\\ + h_t & = \tanh(c_t)\circ o_t\\ + \end{split} + * \f$ + * + * For more detail as to why scaling is applied, see the "Unorthodox" section of the documentation + * \param d Dropout rate \f$d_x\f$ for the input \f$x_t\f$ + * \param d_h Dropout rate \f$d_h\f$ for the output \f$h_t\f$ + */ + void set_dropout(float d, float d_r); + /** + * \brief Set all dropout rates to 0 + * \details This is equivalent to `set_dropout(0)` or `set_dropout(0,0,0)` + * + */ + void disable_dropout(); + /** + * \brief Set dropout masks at the beginning of a sequence for a specific batch size + * \details If this function is not called on batched input, the same mask will be applied across + * all batch elements. Use this to apply different masks to each batch element + * + * \param batch_size Batch size + */ + void set_dropout_masks(unsigned batch_size = 1); +protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; + +public: + // first index is layer, then ... + std::vector> params; + // first index is layer, then ... + std::vector> ln_params; + + // first index is layer, then ... + std::vector> param_vars; + // first index is layer, then ... + std::vector> ln_param_vars; + + // first index is layer, then ... + std::vector> masks; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned input_dim, hid; + float dropout_rate_h; + bool ln_lstm; + + + +private: + DYNET_SERIALIZE_DECLARE() + ComputationGraph* _cg; // Pointer to current cg + +}; + +} // namespace dynet + + +// Class version +DYNET_VERSION_DEFINE(dynet::LSTMBuilder, 1); +// Class version +DYNET_VERSION_DEFINE(dynet::VanillaLSTMBuilder, 1); + + +#endif diff --git a/thirdparty/dynet/dynet/mem.cc b/thirdparty/dynet/dynet/mem.cc new file mode 100644 index 000000000..b3f067ed9 --- /dev/null +++ b/thirdparty/dynet/dynet/mem.cc @@ -0,0 +1,90 @@ +#include "dynet/mem.h" + +#include +#include +#include +#if !_WINDOWS +#include +#include +#endif + +#include +#if !_WINDOWS +#include +#endif +#include "dynet/except.h" +#if HAVE_CUDA +#include "dynet/cuda.h" +#include +#include +#endif + +using namespace std; + +namespace dynet { + +MemAllocator::~MemAllocator() {} + +void* CPUAllocator::malloc(size_t n) { + void* ptr = _mm_malloc(n, align); + if (!ptr) { + cerr << "CPU memory allocation failed n=" << n << " align=" << align << endl; + throw dynet::out_of_memory("CPU memory allocation failed"); + } + return ptr; +} + +void CPUAllocator::free(void* mem) { + _mm_free(mem); +} + +void CPUAllocator::zero(void* p, size_t n) { + memset(p, 0, n); +} + +void* SharedAllocator::malloc(size_t n) { +#if _WINDOWS + cerr << "Shared memory not supported in Windows" << endl; + throw dynet::out_of_memory("Shared memory allocation failed"); +#else + void* ptr = mmap(NULL, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_SHARED, -1, 0); + if (!ptr) { + cerr << "Shared memory allocation failed n=" << n << endl; + throw dynet::out_of_memory("Shared memory allocation failed"); + } + return ptr; +#endif +} + +void SharedAllocator::free(void* mem) { +// munmap(mem, n); +} + +void SharedAllocator::zero(void* p, size_t n) { + memset(p, 0, n); +} + +#if HAVE_CUDA +void* GPUAllocator::malloc(size_t n) { + void* ptr = nullptr; + CUDA_CHECK(cudaSetDevice(devid)); + CUDA_CHECK(cudaMalloc(&ptr, n)); + if (!ptr) { + cerr << "GPU memory allocation failed n=" << n << endl; + throw dynet::out_of_memory("GPU memory allocation failed"); + } + return ptr; +} + +void GPUAllocator::free(void* mem) { + CUDA_CHECK(cudaFree(mem)); +} + +void GPUAllocator::zero(void* p, size_t n) { + CUDA_CHECK(cudaSetDevice(devid)); + CUDA_CHECK(cudaMemsetAsync(p, 0, n)); +} + +#endif + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/mem.h b/thirdparty/dynet/dynet/mem.h new file mode 100644 index 000000000..c8a0d70fb --- /dev/null +++ b/thirdparty/dynet/dynet/mem.h @@ -0,0 +1,52 @@ +#ifndef DYNET_MEM_H +#define DYNET_MEM_H + +#include + +namespace dynet { + +// allocates memory from the device (CPU, GPU) +// only used to create the memory pools +// creates alignment appropriate for that device +struct MemAllocator { + explicit MemAllocator(int align) : align(align) {} + MemAllocator(const MemAllocator&) = delete; + MemAllocator& operator=(const MemAllocator&) = delete; + virtual ~MemAllocator(); + virtual void* malloc(std::size_t n) = 0; + virtual void free(void* mem) = 0; + virtual void zero(void* p, std::size_t n) = 0; + inline std::size_t round_up_align(std::size_t n) const { + if (align < 2) return n; + return ((n + align - 1) / align) * align; + } + const int align; +}; + +struct CPUAllocator : public MemAllocator { + CPUAllocator() : MemAllocator(32) {} + void* malloc(std::size_t n) override; + void free(void* mem) override; + void zero(void* p, std::size_t n) override; +}; + +struct SharedAllocator : public MemAllocator { + SharedAllocator() : MemAllocator(32) {} + void* malloc(std::size_t n) override; + void free(void* mem) override; + void zero(void* p, std::size_t n) override; +}; + +#if HAVE_CUDA +struct GPUAllocator : public MemAllocator { + explicit GPUAllocator(int devid) : MemAllocator(256), devid(devid) {} + void* malloc(std::size_t n) override; + void free(void* mem) override; + void zero(void* p, std::size_t n) override; + const int devid; +}; +#endif + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/model.cc b/thirdparty/dynet/dynet/model.cc new file mode 100644 index 000000000..df79def4d --- /dev/null +++ b/thirdparty/dynet/dynet/model.cc @@ -0,0 +1,740 @@ +#include "dynet/model.h" +#include "dynet/tensor.h" +#include "dynet/aligned-mem-pool.h" +#include "dynet/dynet.h" + +#include +#include + +#include +#include + + +#include + +#define LOAD_INIT_FUNC() initialize_lookups() + +#ifdef __CUDACC__ +#include "dynet/gpu-ops.h" +#endif + +// Macros for defining functions over parameters +// NOTE: This only works on the default device, as parameters are currently defined over default devices +#ifdef __CUDACC__ +#define DYNET_PARAMNORM_INST_DEV_IMPL(MyParam, regular_func, dev_func) \ + template void MyParam::dev_func(Device_GPU & dev, float *sqnorm) const; +#elif defined(HAVE_CUDA) +#define DYNET_PARAMNORM_INST_DEV_IMPL(MyParam, regular_func, dev_func) \ + extern template void MyParam::dev_func(Device_GPU & dev, float *sqnorm) const; \ + template void MyParam::dev_func(Device_CPU & dev, float *sqnorm) const; \ + void MyParam::regular_func(float *sqnorm) const { \ + if(default_device->type == DeviceType::CPU) { dev_func(*(Device_CPU*)default_device,sqnorm); } \ + else if(default_device->type == DeviceType::GPU) { dev_func(*(Device_GPU*)default_device,sqnorm); } \ + else { throw std::runtime_error("Invalid device type in MyParam::dev_func"); } \ + } +#else +#define DYNET_PARAMNORM_INST_DEV_IMPL(MyParam, regular_func, dev_func) \ + template void MyParam::dev_func(Device_CPU & dev, float *sqnorm) const; \ + void MyParam::regular_func(float *sqnorm) const { \ + if(default_device->type == DeviceType::CPU) { dev_func(*(Device_CPU*)default_device,sqnorm); } \ + else { throw std::runtime_error("Invalid device type in MyParam::dev_func"); } \ + } +#endif + +using namespace std; + +namespace dynet { + +// CPU only functions +#ifndef __CUDACC__ + +ParameterStorageBase::~ParameterStorageBase() {} +DYNET_SERIALIZE_IMPL(ParameterStorageBase) + +ParameterStorage::ParameterStorage(const Dim& d, float scale) : dim(d) { + values.d = g.d = d; + values.device = g.device = default_device; + default_device->allocate_tensor(DeviceMempool::PS, values); + default_device->allocate_tensor(DeviceMempool::PS, g); + TensorTools::zero(g); + if (scale == 0.0f) { + ParameterInitGlorot init; + init.initialize_params(values); + } else { + ParameterInitUniform init(scale); + init.initialize_params(values); + } +} + +ParameterStorage::ParameterStorage(const Dim& d, const ParameterInit & init) : dim(d) { + values.d = g.d = d; + values.device = g.device = default_device; + default_device->allocate_tensor(DeviceMempool::PS, values); + default_device->allocate_tensor(DeviceMempool::PS, g); + TensorTools::zero(g); + init.initialize_params(values); +} + +size_t ParameterStorage::size() const { return dim.size(); } + +void ParameterStorage::zero() { + TensorTools::zero(values); + clear(); +} + +void ParameterStorage::copy(const ParameterStorage & param) { + DYNET_ARG_CHECK(dim == param.dim, + "Attempt to copy between parameters with mismatched dimensions: " << dim << " != " << param.dim); + TensorTools::copy_elements(values, param.values); +} + +void ParameterStorage::clear() { + if (g.v != nullptr) + TensorTools::zero(g); +} + +void ParameterStorage::clip(float left, float right) { + TensorTools::clip(values, left, right); +} + +#ifndef __CUDACC__ +DYNET_SERIALIZE_COMMIT(ParameterStorage, + DYNET_SERIALIZE_DERIVED_DEFINE(ParameterStorageBase, dim, values, g)) +DYNET_SERIALIZE_IMPL(ParameterStorage) +#endif + +LookupParameterStorage::LookupParameterStorage(unsigned n, const Dim& d) : dim(d), all_updated(false) { + all_dim = dim; all_dim.d[all_dim.nd++] = n; + all_grads.d = all_values.d = all_dim; + all_grads.device = all_values.device = default_device; + default_device->allocate_tensor(DeviceMempool::PS, all_values); + default_device->allocate_tensor(DeviceMempool::PS, all_grads); + ParameterInitGlorot init(true); + init.initialize_params(all_values); + initialize_lookups(); +} + +LookupParameterStorage::LookupParameterStorage(unsigned n, const Dim& d, const ParameterInit & init) : dim(d), all_updated(false) { + all_dim = dim; all_dim.d[all_dim.nd++] = n; + all_grads.d = all_values.d = all_dim; + all_grads.device = all_values.device = default_device; + default_device->allocate_tensor(DeviceMempool::PS, all_values); + default_device->allocate_tensor(DeviceMempool::PS, all_grads); + init.initialize_params(all_values); + initialize_lookups(); +} + +void LookupParameterStorage::initialize_lookups() { + int num = all_dim[all_dim.nd - 1]; + dim = all_dim; dim.nd--; + int dim_size = dim.size(); + if (values.size() == 0) { + values.resize(num); + for (int i = 0; i < num; ++i) + values[i] = Tensor(dim, all_values.v + i * dim_size, all_values.device, all_values.mem_pool); + } + if (grads.size() == 0 && all_grads.v != nullptr) { + grads.resize(num); + for (int i = 0; i < num; ++i) + grads[i] = Tensor(dim, all_grads.v + i * dim_size, all_grads.device, all_grads.mem_pool); + } +} + +void LookupParameterStorage::zero() { + TensorTools::zero(all_values); +} + +size_t LookupParameterStorage::size() const { + return all_dim.size(); +} + +void LookupParameterStorage::copy(const LookupParameterStorage& param) { + if(all_dim != param.all_dim) + DYNET_INVALID_ARG("Attempt to copy between lookup parameters with mismatched dimensions: " << all_dim << " != " << param.all_dim); + TensorTools::copy_elements(all_values, param.all_values); +} + +void LookupParameterStorage::clear() { + // TODO: the GPU part is hacky, probably need a better heuristic + if (all_grads.device->type == DeviceType::GPU || all_updated) { + TensorTools::zero(all_grads); + } else { + for (auto i : non_zero_grads) + TensorTools::zero(grads[i]); + } + non_zero_grads.clear(); + all_updated = false; +} + +#ifndef __CUDACC__ +DYNET_SERIALIZE_SAVE_COMMIT(LookupParameterStorage, + DYNET_SERIALIZE_DERIVED_DEFINE(ParameterStorageBase, all_dim, all_values, all_grads)) +DYNET_SERIALIZE_LOAD_COMMIT(LookupParameterStorage, LOAD_INIT_FUNC(), + DYNET_SERIALIZE_DERIVED_DEFINE(ParameterStorageBase, all_dim, all_values, all_grads)) +DYNET_SAVELOAD_IMPL(LookupParameterStorage) +#endif + +void ParameterInitNormal::initialize_params(Tensor & values) const { + TensorTools::randomize_normal(values, mean, sqrt(var)); +} + +void ParameterInitUniform::initialize_params(Tensor & values) const { + TensorTools::randomize_uniform(values, left, right); +} + +void ParameterInitConst::initialize_params(Tensor & values) const { + TensorTools::constant(values, cnst); +} + +void ParameterInitIdentity::initialize_params(Tensor & values) const { + TensorTools::identity(values); +} + +void ParameterInitGlorot::initialize_params(Tensor & values) const { + int dims = 0, dim_len = values.d.nd - (lookup ? 1 : 0); + for (int i = 0; i < dim_len; ++i) dims += values.d[i]; + float my_scale = gain * sqrt(6) / sqrt(dims); + TensorTools::randomize_uniform(values, -my_scale, my_scale); +} + +void ParameterInitSaxe::initialize_params(Tensor & values) const { + if (values.device->type == DeviceType::GPU) + throw std::runtime_error("Saxe initialization not implemented for CUDA (we welcome pull requests)"); + else + TensorTools::randomize_orthonormal(values, gain); +} + + +void ParameterInitFromVector::initialize_params(Tensor & values) const { + TensorTools::set_elements(values, vec); +} + +void ParameterInitFromFile::initialize_params(Tensor & values) const { + ifstream is(filename); + istream_iterator start(is), end; + vector param_vector(start, end); + TensorTools::set_elements(values, param_vector); +} + + +Parameter::Parameter() { + mp = nullptr; + index = 0; +} + +Parameter::Parameter(Model* mp, unsigned long index) : mp(mp), index(index) {} + +ParameterStorage* Parameter::get() const { + return mp->parameters_list()[index]; +} +void Parameter::clip_inplace(float left, float right){ + float my_scale = 1./ mp->weight_decay.current_weight_decay(); + get()->clip(left * my_scale, right * my_scale); +} +void Parameter::zero() { + return mp->parameters_list()[index]->zero(); +} + +void Parameter::set_updated(bool b) { + mp->set_updated_param(this, b); +} + +bool Parameter::is_updated() { + return mp->is_updated_param(this); +} + + +#ifndef __CUDACC__ +DYNET_SERIALIZE_COMMIT(Parameter, DYNET_SERIALIZE_DEFINE(mp, index)) +DYNET_SERIALIZE_IMPL(Parameter) +#endif + +LookupParameter::LookupParameter() { + mp = nullptr; + index = 0; +} + +LookupParameter::LookupParameter(Model* mp, unsigned long index) : mp(mp), index(index) {} + +LookupParameterStorage* LookupParameter::get() const { + return mp->lookup_parameters_list()[index]; +} + +void LookupParameter::zero() { + return mp->lookup_parameters_list()[index]->zero(); +} + +void LookupParameter::initialize(unsigned index, const std::vector& val) const { + get()->initialize(index, val); +} + +void LookupParameter::set_updated(bool b) { + mp->set_updated_lookup_param(this, b); +} +bool LookupParameter::is_updated() { + return mp->is_updated_lookup_param(this); +} + +#ifndef __CUDACC__ +DYNET_SERIALIZE_COMMIT(LookupParameter, DYNET_SERIALIZE_DEFINE(mp, index)) +DYNET_SERIALIZE_IMPL(LookupParameter) +#endif + +Model::Model() : gradient_norm_scratch(nullptr) { + weight_decay.set_lambda(weight_decay_lambda); +} + +Model::~Model() { + for (auto p : all_params) delete p; + if (gradient_norm_scratch) + default_device->mem->free(gradient_norm_scratch); +} + +void Model::set_weight_decay_lambda(float lambda) { + weight_decay.set_lambda(lambda); +} + +void Model::project_weights(float radius) { + static float* project_scratch = 0; + if (!project_scratch) + project_scratch = (float*)default_device->mem->malloc(all_params.size() * sizeof(float)); + int pi = 0; + for (auto p : all_params) { + p->squared_l2norm(&project_scratch[pi]); + ++pi; + } + double gg = 0; + for (int i = 0; i < pi; ++i) + gg += project_scratch[i]; + cerr << "NORM: " << sqrt(gg) << endl; +} + +Parameter Model::add_parameters(const Dim& d, float scale) { + ParameterStorage* p = new ParameterStorage(d, scale); + Parameter r(this, params.size()); + //cerr << "Adding parameters with dim " << d << endl; + all_params.push_back(p); + params.push_back(p); + updated_params.push_back(r.index); + return r; +} + +Parameter Model::add_parameters(const Dim& d, const ParameterInit & init) { + ParameterStorage* p = new ParameterStorage(d, init); + Parameter r(this, params.size()); + //cerr << "Adding parameters with dim " << d << endl; + all_params.push_back(p); + params.push_back(p); + updated_params.push_back(r.index); + return r; +} + + +LookupParameter Model::add_lookup_parameters(unsigned n, const Dim& d) { + LookupParameterStorage* p = new LookupParameterStorage(n, d); + LookupParameter r(this, lookup_params.size()); + //cerr << "Adding lookup parameters with dim " << d << " and size " << n << endl; + all_params.push_back(p); + lookup_params.push_back(p); + updated_lookup_params.push_back(r.index); + return r; +} + +LookupParameter Model::add_lookup_parameters(unsigned n, const Dim& d, const ParameterInit & init) { + LookupParameterStorage* p = new LookupParameterStorage(n, d, init); + LookupParameter r(this, lookup_params.size()); + //cerr << "Adding lookup parameters with dim " << d << " and size " << n << endl; + all_params.push_back(p); + lookup_params.push_back(p); + updated_lookup_params.push_back(r.index); + return r; +} + +void Model::set_updated_param(const Parameter *p, bool status) { + unsigned idx = p->index; + DYNET_ASSERT(idx < params.size(), "Parameter ID " << idx << " is less than parameter size " << params.size()); + + auto position = std::find(updated_params.begin(), updated_params.end(), idx); + if (position == updated_params.end()) { + if (status) updated_params.push_back(idx); + } else { + if (!status) updated_params.erase(position); + } +} + +void Model::set_updated_lookup_param(const LookupParameter *p, bool status) { + unsigned idx = p->index; + DYNET_ASSERT(idx < lookup_params.size(), "LookupParameter ID " << idx << " is less than lookup parameter size " << lookup_params.size()); + + auto position = std::find(updated_lookup_params.begin(), updated_lookup_params.end(), idx); + if (position == updated_lookup_params.end()) { + if (status) updated_lookup_params.push_back(idx); + } else { + if (!status) updated_lookup_params.erase(position); + } +} + +bool Model::is_updated_param(const Parameter* p) { + auto position = std::find(updated_params.begin(), updated_params.end(), p->index); + return position != updated_params.end(); +} + +bool Model::is_updated_lookup_param(const LookupParameter* p) { + auto position = std::find(updated_lookup_params.begin(), updated_lookup_params.end(), p->index); + return position != updated_lookup_params.end(); +} + +void Model::reset_gradient() { + for (auto p : params) { p->clear(); } + for (auto p : lookup_params) { p->clear(); } +} + +size_t Model::parameter_count() const { + size_t r = 0; + for (const ParameterStorageBase* param : all_params) { + r += param->size(); + } + return r; +} + +size_t Model::updated_parameter_count() const { + size_t r = 0; + for (const unsigned idx : updated_params) { + r += params[idx]->size(); + } + for (const unsigned idx : updated_lookup_params) { + r += lookup_params[idx]->size(); + } + return r; +} + +#ifndef __CUDACC__ +DYNET_SERIALIZE_COMMIT(Model, + DYNET_SERIALIZE_DEFINE(all_params, params, + lookup_params, weight_decay, + updated_params, updated_lookup_params)) +DYNET_SERIALIZE_IMPL(Model) +#endif + +void save_dynet_model(std::string filename, Model* model) { + std::ofstream out(filename); + boost::archive::text_oarchive oa(out); + oa << (*model); +}; + +void load_dynet_model(std::string filename, Model* model) { + std::ifstream in(filename); + boost::archive::text_iarchive ia(in); + ia >> (*model); +}; + +#endif + +// CPU/GPU code +// TODO: It's a bit annoying to re-implement the CPU/GPU control code for each +// function, but it's not clear how to handle heterogeneous functions w/ +// macros + +// Note: Using DeviceMempool::NONE here because these tensors are not persistent +// and won't be saved so it doesn't matter which mempool they belong to. + +// Take the squared norm +template +void ParameterStorage::squared_l2norm_dev(MyDevice & dev, float* sqnorm) const { + Tensor sqnorm_t({1}, sqnorm, &dev, DeviceMempool::NONE); + sqnorm_t.t<0>().device(*dev.edevice) = values.tvec().square().sum(); +} +DYNET_PARAMNORM_INST_DEV_IMPL(ParameterStorage, squared_l2norm, squared_l2norm_dev) + +// Take the squared norm of the gradient +template +void ParameterStorage::g_squared_l2norm_dev(MyDevice & dev, float* sqnorm) const { + DYNET_ASSERT(g.v != nullptr, "Cannot take norm of gradient with null parameter"); + Tensor sqnorm_t({1}, sqnorm, &dev, DeviceMempool::NONE); + sqnorm_t.t<0>().device(*dev.edevice) = g.tvec().square().sum(); +} +DYNET_PARAMNORM_INST_DEV_IMPL(ParameterStorage, g_squared_l2norm, g_squared_l2norm_dev) + +template +void ParameterStorage::accumulate_grad_dev(MyDevice & dev, const Tensor& d) { + g.tvec().device(*dev.edevice) += d.tvec(); +} +#ifdef __CUDACC__ +template void ParameterStorage::accumulate_grad_dev(Device_GPU & dev, const Tensor& d); +#elif defined(HAVE_CUDA) +extern template void ParameterStorage::accumulate_grad_dev(Device_GPU & dev, const Tensor& d); +template void ParameterStorage::accumulate_grad_dev(Device_CPU & dev, const Tensor& d); +void ParameterStorage::accumulate_grad(const Tensor& d) { + if (values.device->type == DeviceType::CPU) { accumulate_grad_dev(*(Device_CPU*)values.device, d); } + else if (values.device->type == DeviceType::GPU) { accumulate_grad_dev(*(Device_GPU*)values.device, d); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void ParameterStorage::accumulate_grad_dev(Device_CPU & dev, const Tensor& d); +void ParameterStorage::accumulate_grad(const Tensor& d) { + if (values.device->type == DeviceType::CPU) { accumulate_grad_dev(*(Device_CPU*)values.device, d); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void ParameterStorage::scale_parameters_dev(MyDevice & dev, float a) { + values.tvec().device(*dev.edevice) = values.tvec() * a; +} +#ifdef __CUDACC__ +template void ParameterStorage::scale_parameters_dev(Device_GPU & dev, float a); +#elif defined(HAVE_CUDA) +extern template void ParameterStorage::scale_parameters_dev(Device_GPU & dev, float a); +template void ParameterStorage::scale_parameters_dev(Device_CPU & dev, float a); +void ParameterStorage::scale_parameters(float a) { + if (values.device->type == DeviceType::CPU) { scale_parameters_dev(*(Device_CPU*)values.device, a); } + else if (values.device->type == DeviceType::GPU) { scale_parameters_dev(*(Device_GPU*)values.device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void ParameterStorage::scale_parameters_dev(Device_CPU & dev, float a); +void ParameterStorage::scale_parameters(float a) { + if (values.device->type == DeviceType::CPU) { scale_parameters_dev(*(Device_CPU*)values.device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void ParameterStorage::scale_gradient_dev(MyDevice & dev, float a) { + g.tvec().device(*dev.edevice) = g.tvec() * a; +} +#ifdef __CUDACC__ +template void ParameterStorage::scale_gradient_dev(Device_GPU & dev, float a); +#elif defined(HAVE_CUDA) +extern template void ParameterStorage::scale_gradient_dev(Device_GPU & dev, float a); +template void ParameterStorage::scale_gradient_dev(Device_CPU & dev, float a); +void ParameterStorage::scale_gradient(float a) { + if (g.device->type == DeviceType::CPU) { scale_gradient_dev(*(Device_CPU*)g.device, a); } + else if (g.device->type == DeviceType::GPU) { scale_gradient_dev(*(Device_GPU*)g.device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void ParameterStorage::scale_gradient_dev(Device_CPU & dev, float a); +void ParameterStorage::scale_gradient(float a) { + if (g.device->type == DeviceType::CPU) { scale_gradient_dev(*(Device_CPU*)g.device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void LookupParameterStorage::initialize_dev(MyDevice & dev, unsigned index, const vector& val) { + DYNET_ARG_CHECK(int(val.size()) == int(dim.size()), + "Attempt to initialize LookupParameters with vector of wrong size " + "(" << val.size() << " != " << dim.size() << ")"); +#ifdef __CUDACC__ + cudaMemcpyAsync(values[index].v, &val[0], val.size() * sizeof(float), cudaMemcpyHostToDevice); +#else + memcpy(values[index].v, &val[0], val.size() * sizeof(float)); +#endif +} +#ifdef __CUDACC__ +template void LookupParameterStorage::initialize_dev(Device_GPU & dev, unsigned index, const vector& val); +#elif defined(HAVE_CUDA) +extern template void LookupParameterStorage::initialize_dev(Device_GPU & dev, unsigned index, const vector& val); +template void LookupParameterStorage::initialize_dev(Device_CPU & dev, unsigned index, const vector& val); +void LookupParameterStorage::initialize(unsigned index, const vector& val) { + if (values[index].device->type == DeviceType::CPU) { initialize_dev(*(Device_CPU*)values[index].device, index, val); } + else if (values[index].device->type == DeviceType::GPU) { initialize_dev(*(Device_GPU*)values[index].device, index, val); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void LookupParameterStorage::initialize_dev(Device_CPU & dev, unsigned index, const vector& val); +void LookupParameterStorage::initialize(unsigned index, const vector& val) { + if (values[index].device->type == DeviceType::CPU) { initialize_dev(*(Device_CPU*)values[index].device, index, val); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void LookupParameterStorage::squared_l2norm_dev(MyDevice & dev, float* sqnorm) const { + Tensor sqnorm_t({1}, sqnorm, &dev, DeviceMempool::NONE); + sqnorm_t.t<0>().device(*dev.edevice) = all_values.tvec().square().sum(); +} +DYNET_PARAMNORM_INST_DEV_IMPL(LookupParameterStorage, squared_l2norm, squared_l2norm_dev) + +template +void LookupParameterStorage::g_squared_l2norm_dev(MyDevice & dev, float* sqnorm) const { + Tensor sqnorm_t({1}, sqnorm, &dev, DeviceMempool::NONE); + TensorTools::zero(sqnorm_t); + // TODO: the GPU part is hacky, probably need a better heuristic + if (all_grads.device->type == DeviceType::GPU || all_updated) { + sqnorm_t.t<0>().device(*dev.edevice) += all_grads.tvec().square().sum(); + } else { + auto it = non_zero_grads.begin(); + while (it != non_zero_grads.end()) + sqnorm_t.t<0>().device(*dev.edevice) += grads[*(it++)].tvec().square().sum(); + } +} +DYNET_PARAMNORM_INST_DEV_IMPL(LookupParameterStorage, g_squared_l2norm, g_squared_l2norm_dev) + +template +void LookupParameterStorage::accumulate_grad_dev(MyDevice & dev, const Tensor& d) { + all_updated = true; + all_grads.tvec().device(*dev.edevice) += d.tvec(); +} +#ifdef __CUDACC__ +template void LookupParameterStorage::accumulate_grad_dev(Device_GPU & dev, const Tensor& d); +#elif defined(HAVE_CUDA) +extern template void LookupParameterStorage::accumulate_grad_dev(Device_GPU & dev, const Tensor& d); +template void LookupParameterStorage::accumulate_grad_dev(Device_CPU & dev, const Tensor& d); +void LookupParameterStorage::accumulate_grad(const Tensor& d) { + if (all_values.device->type == DeviceType::CPU) { accumulate_grad_dev(*(Device_CPU*)all_values.device, d); } + else if (all_values.device->type == DeviceType::GPU) { accumulate_grad_dev(*(Device_GPU*)all_values.device, d); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void LookupParameterStorage::accumulate_grad_dev(Device_CPU & dev, const Tensor& d); +void LookupParameterStorage::accumulate_grad(const Tensor& d) { + if (all_values.device->type == DeviceType::CPU) { accumulate_grad_dev(*(Device_CPU*)all_values.device, d); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void LookupParameterStorage::accumulate_grad_dev(MyDevice & dev, unsigned index, const Tensor& d) { + non_zero_grads.insert(index); + grads[index].tvec().device(*dev.edevice) += d.tvec(); +} +#ifdef __CUDACC__ +template void LookupParameterStorage::accumulate_grad_dev(Device_GPU & dev, unsigned index, const Tensor& d); +#elif defined(HAVE_CUDA) +extern template void LookupParameterStorage::accumulate_grad_dev(Device_GPU & dev, unsigned index, const Tensor& d); +template void LookupParameterStorage::accumulate_grad_dev(Device_CPU & dev, unsigned index, const Tensor& d); +void LookupParameterStorage::accumulate_grad(unsigned index, const Tensor& d) { + if (values[index].device->type == DeviceType::CPU) { accumulate_grad_dev(*(Device_CPU*)values[index].device, index, d); } + else if (values[index].device->type == DeviceType::GPU) { accumulate_grad_dev(*(Device_GPU*)values[index].device, index, d); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void LookupParameterStorage::accumulate_grad_dev(Device_CPU & dev, unsigned index, const Tensor& d); +void LookupParameterStorage::accumulate_grad(unsigned index, const Tensor& d) { + if (values[index].device->type == DeviceType::CPU) { accumulate_grad_dev(*(Device_CPU*)values[index].device, index, d); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void LookupParameterStorage::accumulate_grads_dev(MyDevice & dev, unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g) { +#ifdef __CUDACC__ + for (unsigned i = 0; i < n; ++i) + non_zero_grads.insert(ids_host[i]); + dynet::gpu::dense_to_sparse_block_add(n, ids_dev, dim.size(), g, all_grads.v); +#else + size_t gsize = dim.size(); + Tensor gt(dim, g, all_grads.device, all_grads.mem_pool); + for (unsigned i = 0; i < n; ++i) { + non_zero_grads.insert(ids_host[i]); + grads[ids_host[i]].tvec().device(*dev.edevice) += gt.tvec(); + gt.v += gsize; + } +#endif +} +#ifdef __CUDACC__ +template void LookupParameterStorage::accumulate_grads_dev(Device_GPU & dev, unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g); +#elif defined(HAVE_CUDA) +extern template void LookupParameterStorage::accumulate_grads_dev(Device_GPU & dev, unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g); +template void LookupParameterStorage::accumulate_grads_dev(Device_CPU & dev, unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g); +void LookupParameterStorage::accumulate_grads(unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g) { + if (all_values.device->type == DeviceType::CPU) { accumulate_grads_dev(*(Device_CPU*)all_values.device, n, ids_host, ids_dev, g); } + else if (all_values.device->type == DeviceType::GPU) { accumulate_grads_dev(*(Device_GPU*)all_values.device, n, ids_host, ids_dev, g); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void LookupParameterStorage::accumulate_grads_dev(Device_CPU & dev, unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g); +void LookupParameterStorage::accumulate_grads(unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g) { + if (all_values.device->type == DeviceType::CPU) { accumulate_grads_dev(*(Device_CPU*)all_values.device, n, ids_host, ids_dev, g); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void LookupParameterStorage::scale_parameters_dev(MyDevice & dev, float a) { + all_values.tvec().device(*dev.edevice) = all_values.tvec() * a; +} +#ifdef __CUDACC__ +template void LookupParameterStorage::scale_parameters_dev(Device_GPU & dev, float a); +#elif defined(HAVE_CUDA) +extern template void LookupParameterStorage::scale_parameters_dev(Device_GPU & dev, float a); +template void LookupParameterStorage::scale_parameters_dev(Device_CPU & dev, float a); +void LookupParameterStorage::scale_parameters(float a) { + if (values[0].device->type == DeviceType::CPU) { scale_parameters_dev(*(Device_CPU*)values[0].device, a); } + else if (values[0].device->type == DeviceType::GPU) { scale_parameters_dev(*(Device_GPU*)values[0].device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void LookupParameterStorage::scale_parameters_dev(Device_CPU & dev, float a); +void LookupParameterStorage::scale_parameters(float a) { + if (values[0].device->type == DeviceType::CPU) { scale_parameters_dev(*(Device_CPU*)values[0].device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +void LookupParameterStorage::scale_gradient_dev(MyDevice & dev, float a) { + all_grads.tvec().device(*dev.edevice) = all_grads.tvec() * a; +} +#ifdef __CUDACC__ +template void LookupParameterStorage::scale_gradient_dev(Device_GPU & dev, float a); +#elif defined(HAVE_CUDA) +extern template void LookupParameterStorage::scale_gradient_dev(Device_GPU & dev, float a); +template void LookupParameterStorage::scale_gradient_dev(Device_CPU & dev, float a); +void LookupParameterStorage::scale_gradient(float a) { + if (grads[0].device->type == DeviceType::CPU) { scale_gradient_dev(*(Device_CPU*)grads[0].device, a); } + else if (grads[0].device->type == DeviceType::GPU) { scale_gradient_dev(*(Device_GPU*)grads[0].device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template void LookupParameterStorage::scale_gradient_dev(Device_CPU & dev, float a); +void LookupParameterStorage::scale_gradient(float a) { + if (grads[0].device->type == DeviceType::CPU) { scale_gradient_dev(*(Device_CPU*)grads[0].device, a); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +template +float Model::gradient_l2_norm_dev(MyDevice & dev) const { + if (!gradient_norm_scratch) + gradient_norm_scratch = (float*)default_device->mem->malloc((all_params.size() + 1) * sizeof(float)); + size_t pi; + for (pi = 0; pi < all_params.size(); ++pi) + all_params[pi]->g_squared_l2norm(&gradient_norm_scratch[pi]); + Tensor scratch_t({(unsigned int)all_params.size()}, gradient_norm_scratch, &dev, DeviceMempool::NONE); + Tensor sum_t({1}, gradient_norm_scratch + pi, &dev, DeviceMempool::NONE); + sum_t.t<0>().device(*dev.edevice) = scratch_t.t<1>().sum().sqrt(); +#ifdef __CUDACC__ + float res = 0; + cudaMemcpy(&res, gradient_norm_scratch + pi, sizeof(float), cudaMemcpyDeviceToHost); + return res; +#else + return gradient_norm_scratch[pi]; +#endif +} +#ifdef __CUDACC__ +template float Model::gradient_l2_norm_dev(Device_GPU & dev) const; +#elif defined(HAVE_CUDA) +extern template float Model::gradient_l2_norm_dev(Device_GPU & dev) const; +template float Model::gradient_l2_norm_dev(Device_CPU & dev) const; +float Model::gradient_l2_norm() const { + if (default_device->type == DeviceType::CPU) { return gradient_l2_norm_dev(*(Device_CPU*)default_device); } + else if (default_device->type == DeviceType::GPU) { return gradient_l2_norm_dev(*(Device_GPU*)default_device); } + else { throw std::runtime_error("Bad device type"); } +} +#else +template float Model::gradient_l2_norm_dev(Device_CPU & dev) const; +float Model::gradient_l2_norm() const { + if (default_device->type == DeviceType::CPU) { return gradient_l2_norm_dev(*(Device_CPU*)default_device); } + else { throw std::runtime_error("Bad device type"); } +} +#endif + +} // namespace dynet + +#ifndef __CUDACC__ +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::ParameterStorage) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::LookupParameterStorage) +#endif diff --git a/thirdparty/dynet/dynet/model.h b/thirdparty/dynet/dynet/model.h new file mode 100644 index 000000000..40ca586c2 --- /dev/null +++ b/thirdparty/dynet/dynet/model.h @@ -0,0 +1,727 @@ +/** + * \file model.h + * \defgroup params params + * + */ + +#ifndef DYNET_PARAMS_H_ +#define DYNET_PARAMS_H_ + +#include +#include +#include +#include +#include +#include + +#include "dynet/io-macros.h" +#include "dynet/tensor.h" +#include "dynet/weight-decay.h" + +namespace dynet { + +// to deal with sparse updates, there are two parameter classes: +// * Parameters represents a vector, matrix, (eventually higher order tensors) +// of parameters. These are densely updated. +// * LookupParameters represents a table of vectors that are used to embed a +// set of discrete objects. These are sparsely updated. + +struct ParameterInit; + +/** + * \ingroup params + * @brief This is the base class for ParameterStorage and LookupParameterStorage, the objects handling the actual parameters. + * @details You can access the storage from any Parameter (resp. LookupParameter) class, use it only to do low level manipulations. + * + */ +struct ParameterStorageBase { + friend class Model; + /** + * @brief Scale the parameters + * + * @param a scale factor + */ + virtual void scale_parameters(float a) = 0; + /** + * @brief Scale the gradient + * + * @param a scale factor + */ + virtual void scale_gradient(float a) = 0; + /** + * @brief Set the parameters to 0 + */ + virtual void zero() = 0; + /** + * @brief Get the parameter squared l2 norm + * + * @param sqnorm Pointer to the float holding the result + */ + virtual void squared_l2norm(float* sqnorm) const = 0; + /** + * @brief Get the squared l2 norm of the gradient w.r.t. these parameters + * + * @param sqnorm Pointer to the float holding the result + */ + virtual void g_squared_l2norm(float* sqnorm) const = 0; + /** + * @brief Get the size (number of scalar parameters) + * @return Number of scalar parameters + */ + virtual size_t size() const = 0; + virtual ~ParameterStorageBase(); + DYNET_SERIALIZE_COMMIT_EMPTY() +}; + +// represents parameters (e.g., a weight matrix) that will be optimized +/** + * \ingroup params + * \brief Storage class for Parameters + */ +struct ParameterStorage : public ParameterStorageBase { + friend class Model; + template + void scale_parameters_dev(MyDevice & dev, float a); + void scale_parameters(float a) override; + template + void scale_gradient_dev(MyDevice & dev, float a); + void scale_gradient(float a) override; + void zero() override; + template + void squared_l2norm_dev(MyDevice & dev, float* sqnorm) const; + void squared_l2norm(float* sqnorm) const override; + template + void g_squared_l2norm_dev(MyDevice & dev, float* sqnorm) const; + void g_squared_l2norm(float* sqnorm) const override; + size_t size() const override; + /** + * @brief Copy from another ParameterStorage + * + * @param val ParameterStorage to copy from + */ + void copy(const ParameterStorage & val); + template + void accumulate_grad_dev(MyDevice & dev, const Tensor& g); + /** + * @brief Add a tensor to the gradient + * @details After this method gets called, g <- g + d + * + * @param g Tensor to add + */ + void accumulate_grad(const Tensor& g); + /** + * @brief Clear the gradient (set it to 0) + */ + void clear(); + /** + * @brief Clip the values to the range [left, right] + */ + void clip(float left, float right); + + Dim dim; /**< Dimensions of the parameter tensor*/ + Tensor values;/**< Values of the parameter */ + Tensor g;/**< Values of the gradient w.r.t. this parameter */ + +private: + ParameterStorage() {} + explicit ParameterStorage(const Dim& d, float minmax); // initialize with ~U(-minmax,+minmax) + // or Glorot initialization if minmax = 0 + explicit ParameterStorage(const Dim& d, const ParameterInit & init); // initialize with custom initializer + DYNET_SERIALIZE_DECLARE() +}; + +// represents a matrix/vector embedding of a discrete set +/** + * \ingroup params + * \brief Storage class for LookupParameters + * + */ +struct LookupParameterStorage : public ParameterStorageBase { + friend class Model; + template + void scale_parameters_dev(MyDevice & dev, float a); + void scale_parameters(float a) override; + template + void scale_gradient_dev(MyDevice & dev, float a); + void scale_gradient(float a) override; + void zero() override; + template + void squared_l2norm_dev(MyDevice & dev, float* sqnorm) const; + void squared_l2norm(float* sqnorm) const override; + template + void g_squared_l2norm_dev(MyDevice & dev, float* sqnorm) const; + void g_squared_l2norm(float* sqnorm) const override; + size_t size() const override; + template + void initialize_dev(MyDevice & dev, unsigned index, const std::vector& val); + /** + * @brief Initialize one particular lookup + * + * @param index Index of the lookput to initialize + * @param val Values + */ + void initialize(unsigned index, const std::vector& val); + + /** + * @brief Copy from another LookupParameterStorage + * + * @param val Other LookupParameterStorage to copy from + */ + void copy(const LookupParameterStorage & val); + + template + void accumulate_grad_dev(MyDevice & dev, const Tensor& g); + /** + * @brief Add a Tensor to the gradient of the whole lookup matrix + * @details after this `grads<-grads + g` + * + * @param g [description] + */ + void accumulate_grad(const Tensor& g); + + template + void accumulate_grad_dev(MyDevice & dev, unsigned index, const Tensor& g); + /** + * @brief Add a Tensor to the gradient of one of the lookups + * @details after this `grads[index]<-grads[index] + g` + * + * @param index [description] + * @param g [description] + */ + void accumulate_grad(unsigned index, const Tensor& g); + template + void accumulate_grads_dev(MyDevice & dev, unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g); + /** + * @brief Add tensors to muliple lookups + * @details After this method gets called, `grads[ids_host[i]] <- grads[ids_host[i]] + g[i*dim.size():(i+1)*dim.size()]` + * + * @param n size of `ids_host` + * @param ids_host Indices of the gradients to update + * @param ids_dev [To be documented] (only for GPU) + * @param g Values + */ + void accumulate_grads(unsigned n, const unsigned* ids_host, const unsigned* ids_dev, float* g); + void clear(); + + // Initialize each individual lookup from the overall tensors + void initialize_lookups(); + + // Tensors for all dimensions at once + Dim all_dim; /**< Total dimension */ + Tensor all_values; /**< Values for all dimensions at once */ + Tensor all_grads; /**< Gradient values for all dimensions at once */ + // Tensors for each individual lookup + Dim dim; /**< Dimension for one lookup */ + std::vector values; /**< List of values for each lookup */ + std::vector grads; /**< List of gradient values for each lookup */ + // gradients are sparse, so track which components are nonzero + std::unordered_set non_zero_grads; /**< Gradients are sparse, so track which components are nonzero */ + bool all_updated; /** Whether all of the gradients have been updated. */ +private: + LookupParameterStorage() : all_updated(false) {} + LookupParameterStorage(unsigned n, const Dim& d); + LookupParameterStorage(unsigned n, const Dim& d, const ParameterInit & init); + DYNET_SERIALIZE_SPLIT_DECLARE() +}; + +class Model; +/** + * \ingroup params + * \brief Object representing a trainable parameter + * \details This objects acts as a high level component linking the actual parameter values (ParameterStorage) and the Model. As long as you don't want to do low level hacks at the ParameterStorage level, this is what you will use. + * + */ +struct Parameter { + /** + * @brief Default constructor + */ + Parameter(); + /** + * @brief Constructor + * @details This is called by the model, you shouldn't need to use it + * + * @param mp Pointer to th model + * @param index Id of the parameter + */ + Parameter(Model* mp, unsigned long index); + /** + * @brief Get underlying ParameterStorage object + * @return ParameterStorage holding the parameter values + */ + ParameterStorage* get() const; + + /** + * \brief Zero the parameters + */ + void zero(); + + Model* mp;/**< Pointer to the Model holding this parameter */ + unsigned long index;/**< Index of this parameter in its Model*/ + + /** + * \brief Shape of the parameter + * + * \return Shape as a `Dim` object + */ + Dim dim() const { return get()->dim; } + + /** + * \brief Values of the parameter + * + * \return Values as a `Tensor` object + */ + Tensor* values() { return &(get()->values); } + + /** + * @brief Set the parameter as updated + * + * @param b Update status + */ + void set_updated(bool b); + + /** + * @brief Scales the parameter (multiplies by `s`) + * + * @param s scale + */ + void scale(float s){get()->scale_parameters(s);} + + + /** + * @brief Scales the gradient (multiplies by `s`) + * + * @param s scale + */ + void scale_gradient(float s){get()->scale_gradient(s);} + + /** + * @brief Check the update status + * @return Update status + */ + bool is_updated(); + /** + * @brief Clip the values of the parameter to the range [left, right] (in place) + */ + void clip_inplace(float left, float right); +private: + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup params + * \brief Object representing a trainable lookup parameter + * + */ +struct LookupParameter { + LookupParameter(); + LookupParameter(Model* mp, unsigned long index); + /** + * @brief Get underlying LookupParameterStorage object + * @return LookupParameterStorage holding the parameter values + */ + LookupParameterStorage* get() const; + /** + * @brief Initialize one particular column + * + * @param index Index of the column to be initialized + * @param val [description] + */ + void initialize(unsigned index, const std::vector& val) const; + + /** + * \brief Zero the parameters + */ + void zero(); + + Model* mp;/**< Pointer to the Model holding this parameter */ + unsigned long index;/**< Index of this parameter in its Model*/ + + /** + * \brief Shape of the lookup parameter + * + * \return Shape as a `Dim` object + */ + Dim dim() const { return get()->dim; } + /** + * \brief Values of the lookup parameter + * + * \return Values as a `Tensor` object + */ + std::vector* values() { return &(get()->values); } + + /** + * @brief Scales the parameter (multiplies by `s`) + * + * @param s scale + */ + void scale(float s){get()->scale_parameters(s);} + + /** + * @brief Scales the gradient (multiplies by `s`) + * + * @param s scale + */ + void scale_gradient(float s){get()->scale_gradient(s);} + + /** + * @brief Set the parameter as updated + * + * @param b Update status + */ + void set_updated(bool b); + /** + * @brief Check the update status + * @return Update status + */ + bool is_updated(); + +private: + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup params + * \brief Initializers for parameters + * \details Allows for custom parameter initialization + */ +struct ParameterInit { + /** + * \brief Default constructor + */ + ParameterInit() {} + virtual ~ParameterInit() {} + /** + * \brief Function called upon initialization + * \details Whenever you inherit this struct to implement your own custom initializer, this is the function you want to overload to implement your logic. + * + * \param values The tensor to be initialized. You should modify it in-place. See dynet/model.cc for some examples + */ + virtual void initialize_params(Tensor & values) const = 0; +}; + +/** + * \ingroup params + * \brief Initialize parameters with samples from a normal distribution + */ +struct ParameterInitNormal : public ParameterInit { + /** + * \brief Constructor + * + * \param m Mean of the gaussian distribution + * \param v Variance of the gaussian distribution (reminder : the variance is the __square__ of the standard deviation) + */ + ParameterInitNormal(float m = 0.0f, float v = 1.0f) : mean(m), var(v) {} + virtual void initialize_params(Tensor & values) const override; +private: + float mean, var; +}; + +/** + * \ingroup params + * \brief Initialize parameters with samples from a uniform distribution + * + */ +struct ParameterInitUniform : public ParameterInit { + /** + * \brief Constructor for uniform distribution centered on 0 + * \details [long description]Samples parameters from \f$mathcal U([-\mathrm{scale},+\mathrm{scale}]\f$ + * \param scale Scale of the distribution + */ + ParameterInitUniform(float scale) : + left(-scale), right(scale) { if (scale == 0.0f) throw std::domain_error("Scale of the uniform distribution cannot be 0 in ParameterInitUniform"); } + /** + * \brief Constructor for uniform distribution in a specific interval + * \details [long description] + * + * \param l Lower bound of the interval + * \param r Upper bound of the interval + */ + ParameterInitUniform(float l, float r) : left(l), right(r) { if (l == r) throw std::domain_error("Empty interval in ParameterInitUniform"); } + virtual void initialize_params(Tensor & values) const override; +private: + float left, right; +}; + +/** + * \ingroup params + * \brief Initialize parameters with a constant value + */ +struct ParameterInitConst : public ParameterInit { + /** + * \brief Constructor + * + * \param c Constant value + */ + ParameterInitConst(float c) : cnst(c) {} + virtual void initialize_params(Tensor & values) const override; +private: + float cnst; +}; + +/** + * \ingroup params + * \brief Initialize as the identity + * \details This will raise an exception if used on non square matrices + */ +struct ParameterInitIdentity : public ParameterInit { + /** + * \brief Constructor + */ + ParameterInitIdentity() {} + virtual void initialize_params(Tensor & values) const override; +}; + +/** + * \ingroup params + * \brief Initialize with the methods described in [Glorot, 2010](http://www.jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf?hc_location=ufi) + * \details In order to preserve the variance of the forward and backward flow across layers, the parameters \f$\theta\f$ are initialized such that \f$\mathrm{Var}(\theta)=\frac 2 {n_1+n_2}\f$ where \f$n_1,n_2\f$ are the input and output dim. + * Important note : The underlying distribution is uniform (not gaussian) + * + */ +struct ParameterInitGlorot : public ParameterInit { + /** + * \brief Constructor + * + * \param is_lookup Boolean value identifying the parameter as a LookupParameter + * \param gain Scaling parameter. In order for the Glorot initialization to be correct, you should ût this equal to \f$\frac 1 {f'(0)}\f$ where \f$f\f$ is your activation function + */ + ParameterInitGlorot(bool is_lookup = false, float gain = 1.f) : lookup(is_lookup), gain(gain) {} + virtual void initialize_params(Tensor & values) const override; +private: + bool lookup; + float gain; +}; +/** + * \ingroup params + * \brief Initializes according to [Saxe et al., 2014](https://arxiv.org/abs/1312.6120) + * \details Initializes as a random orthogonal matrix (unimplemented for GPU) + */ +struct ParameterInitSaxe : public ParameterInit { + /** + * \brief Constructor + */ + ParameterInitSaxe(float gain = 1.0) : gain(gain) {} + virtual void initialize_params(Tensor & values) const override; +private: + float gain; +}; + +/** + * \ingroup params + * \brief Initializes from a file + * \details Useful for reusing weights, etc... + * + */ +struct ParameterInitFromFile : public ParameterInit { + /** + * \brief Constructor + * \param f File name (format should just be a list of values) + */ + ParameterInitFromFile(std::string f) : filename(f) {} + virtual void initialize_params(Tensor & values) const override; +private: + std::string filename; +}; + +/** + * \ingroup params + * \brief Initializes from a `std::vector` of floats + */ +struct ParameterInitFromVector : public ParameterInit { + /** + * \brief Constructor + * + * \param v Vector of values to be used + */ + ParameterInitFromVector(std::vector v) : vec(v) {} + virtual void initialize_params(Tensor & values) const override; +private: + std::vector vec; +}; + + + +// this is a collection of parameters +// if you need a matrix of parameters, or a lookup table - ask an instance of this class +// this knows how to serialize itself +// parameters know how to track their gradients, but any extra information (like velocity) will live here +/** + * \ingroup params + * \brief This is a collection of parameters + * \details if you need a matrix of parameters, or a lookup table - ask an instance of this class. + * This knows how to serialize itself. + * Parameters know how to track their gradients, but any extra information (like velocity) will live here + */ +class Model { +public: + /** + * \brief Constructor + */ + Model(); + ~Model(); + template + float gradient_l2_norm_dev(MyDevice & dev) const; + /** + * \brief Returns the l2 of your gradient + * \details Use this to look for gradient vanishing/exploding + * \return L2 norm of the gradient + */ + float gradient_l2_norm() const; + /** + * \brief Sets all gradients to zero + */ + void reset_gradient(); + // set scale to use custom initialization + /** + * \brief Add parameters to model and returns Parameter object + * \details creates a ParameterStorage object holding a tensor of dimension `d` and returns a Parameter object (to be used as input in the computation graph). The coefficients are sampled according to the `scale` parameter + * + * \param d Shape of the parameter + * \param scale If scale is non-zero, initializes according to \f$mathcal U([-\mathrm{scale},+\mathrm{scale}]\f$, otherwise uses Glorot initialization + * + * \return Parameter object to be used in the computation graph + */ + Parameter add_parameters(const Dim& d, float scale = 0.0f); + /** + * \brief Add parameters with custom initializer + * + * \param d Shape of the parameter + * \param init Custom initializer + * + * \return Parameter object to be used in the computation graph + */ + Parameter add_parameters(const Dim& d, const ParameterInit & init); + /** + * \brief Add lookup parameter to model + * \details Same as add_parameters. Initializes with Glorot + * + * \param n Number of lookup indices + * \param d Dimension of each embedding + * + * \return LookupParameter object to be used in the computation graph + */ + LookupParameter add_lookup_parameters(unsigned n, const Dim& d); + /** + * \brief Add lookup parameter with custom initializer + * + * \param n Number of lookup indices + * \param d Dimension of each embedding + * \param init Custom initializer + * \return LookupParameter object to be used in the computation graph + */ + LookupParameter add_lookup_parameters(unsigned n, const Dim& d, const ParameterInit & init); + // + /** + * \brief project weights so their L2 norm = radius + * \details NOTE (Paul) : I am not sure this is doing anything currently. The argument doesn't seem to be used anywhere... If you need this raise an issue on github + * + * \param radius Target norm + */ + void project_weights(float radius = 1.0f); + /** + * \brief Set the weight decay coefficient + * + * \param lambda Weight decay coefficient + */ + void set_weight_decay_lambda(float lambda); + + //const std::vector& all_parameters_list() const { return all_params; } + /** + * \brief Returns list of pointers to ParameterSorages + * \details You shouldn't need to use this + * \return List of pointers to ParameterSorages + */ + const std::vector& parameters_list() const { return params; } + /** + * \brief Returns list of pointers to LookupParameterSorages + * \details You shouldn't need to use this + * \return List of pointers to LookupParameterSorages + */ + const std::vector& lookup_parameters_list() const { return lookup_params; } + + // indexes into params and lookup_params + /** + * \brief Returns list of indices of updated params + * + * \return list of indices of updated params + */ + const std::vector& updated_parameters_list() const { return updated_params; } + /** + * \brief Returns list of indices of updated lookup params + * + * \return list of indices of updated lookup params + */ + const std::vector& updated_lookup_parameters_list() const { return updated_lookup_params; } + + // + // + /** + * \brief Returns the total number of tunable parameters (i. e. scalars) contained within this model. + * \details That is to say, a 2x2 matrix counts as four parameters. + * \return Number of parameters + */ + size_t parameter_count() const; + /** + * \brief Returns total number of (scalar) parameters updated + * + * \return number of updated parameters + */ + size_t updated_parameter_count() const; + + /** + * \brief [brief description] + * \details [long description] + * + * \param p [description] + * \param status [description] + */ + void set_updated_param(const Parameter *p, bool status); + /** + * \brief [brief description] + * \details [long description] + * + * \param p [description] + * \param status [description] + */ + void set_updated_lookup_param(const LookupParameter *p, bool status); + /** + * \brief [brief description] + * \details [long description] + * + * \param p [description] + * \return [description] + */ + bool is_updated_param(const Parameter *p); + /** + * \brief [brief description] + * \details [long description] + * + * \param p [description] + * \return [description] + */ + bool is_updated_lookup_param(const LookupParameter *p); + + L2WeightDecay weight_decay; +private: + DYNET_SERIALIZE_DECLARE() + std::vector all_params; + std::vector params; + std::vector lookup_params; + + // these are a subset of the parameters that are used when model is updated. + // kept as indices into params and lookup_params. + std::vector updated_params; + std::vector updated_lookup_params; + + mutable float* gradient_norm_scratch; +}; // class Model + +void save_dynet_model(std::string filename, Model* model); +void load_dynet_model(std::string filename, Model* model); + +} // namespace dynet + +BOOST_CLASS_EXPORT_KEY(dynet::ParameterStorage) +BOOST_CLASS_EXPORT_KEY(dynet::LookupParameterStorage) + +#endif diff --git a/thirdparty/dynet/dynet/mp.cc b/thirdparty/dynet/dynet/mp.cc new file mode 100644 index 000000000..785664e7e --- /dev/null +++ b/thirdparty/dynet/dynet/mp.cc @@ -0,0 +1,88 @@ +#if !_WINDOWS +#include "mp.h" +#include "dynet/except.h" +using namespace std; +using namespace boost::interprocess; + +namespace dynet { + namespace mp { + // TODO: Pass these around instead of having them be global + std::string queue_name = "dynet_mp_work_queue"; + std::string shared_memory_name = "dynet_mp_shared_memory"; + timespec start_time; + bool stop_requested = false; + SharedObject* shared_object = nullptr; + + std::string generate_queue_name() { + std::ostringstream ss; + ss << "dynet_mp_work_queue"; + ss << rand(); + return ss.str(); + } + + std::string generate_shared_memory_name() { + std::ostringstream ss; + ss << "dynet_mp_shared_memory"; + ss << rand(); + return ss.str(); + } + + dynet::real sum_values(const std::vector& values) { + return accumulate(values.begin(), values.end(), 0.0); + } + + dynet::real mean(const std::vector& values) { + return sum_values(values) / values.size(); + } + + std::string elapsed_time_string(const timespec& start, const timespec& end) { + std::ostringstream ss; + time_t secs = end.tv_sec - start.tv_sec; + long nsec = end.tv_nsec - start.tv_nsec; + ss << secs << " seconds and " << nsec << "nseconds"; + return ss.str(); + } + + unsigned spawn_children(std::vector& workloads) { + const unsigned num_children = workloads.size(); + pid_t pid; + unsigned cid; + for (cid = 0; cid < num_children; ++cid) { + pid = fork(); + if (pid == -1) { + std::cerr << "Fork failed. Exiting ..." << std::endl; + return 1; + } + else if (pid == 0) { + // children shouldn't continue looping + break; + } + workloads[cid].pid = pid; + } + return cid; + } + + std::vector create_workloads(unsigned num_children) { + int err; + std::vector workloads(num_children); + for (unsigned cid = 0; cid < num_children; cid++) { + err = pipe(workloads[cid].p2c); + if(err != 0) DYNET_RUNTIME_ERR("Problem writing to p2c pipe " << cid << " in create_workloads"); + err = pipe(workloads[cid].c2p); + if(err != 0) DYNET_RUNTIME_ERR("Problem writing to c2p pipe " << cid << " in create_workloads"); + } + return workloads; + } + + void cleanup(const std::vector& workloads) { + for (const Workload& workload : workloads) { + close (workload.c2p[0]); + close (workload.c2p[1]); + close (workload.p2c[0]); + close (workload.p2c[1]); + } + } + + } +} +#endif diff --git a/thirdparty/dynet/dynet/mp.h b/thirdparty/dynet/dynet/mp.h new file mode 100644 index 000000000..535ba00ca --- /dev/null +++ b/thirdparty/dynet/dynet/mp.h @@ -0,0 +1,466 @@ +#pragma once +#if !_WINDOWS +#include "dynet/globals.h" +#include "dynet/dynet.h" +#include "dynet/training.h" +#include "dynet/expr.h" +#include "dynet/dict.h" +#include "dynet/lstm.h" +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace dynet { + namespace mp { + // TODO: Pass these around instead of having them be global + extern std::string queue_name; + extern std::string shared_memory_name; + extern timespec start_time; + extern bool stop_requested; + + struct WorkloadHeader { + bool is_dev_set; + bool end_of_epoch; + unsigned report_frequency; + }; + + // A simple struct to hold information about a child process + // TODO: Rename me! + struct Workload { + pid_t pid; + int c2p[2]; // Child to parent pipe + int p2c[2]; // Parent to child pipe + }; + + // This interface is used by the child processes and called + // once per datum. + template + class ILearner { + public: + virtual ~ILearner() {} + virtual S LearnFromDatum(const D& datum, bool learn) = 0; + virtual void SaveModel() = 0; + }; + + struct SharedObject { + SharedObject() : update_mutex(1), counter_mutex(1), counter(0) {} + boost::interprocess::interprocess_semaphore update_mutex; + boost::interprocess::interprocess_semaphore counter_mutex; + unsigned counter; + }; + extern SharedObject* shared_object; + + /// XXX: We never delete these objects + template + T* get_shared_memory() { + /*std::cerr << "Creating shared memory named " << shared_memory_name << std::endl; + auto shm = new boost::interprocess::shared_memory_object(boost::interprocess::create_only, shared_memory_name.c_str(), boost::interprocess::read_write); + shm->truncate(sizeof(T)); + auto region = new boost::interprocess::mapped_region (*shm, boost::interprocess::read_write);*/ + auto region = new boost::interprocess::mapped_region(boost::interprocess::anonymous_shared_memory(sizeof(T))); + void* addr = region->get_address(); + T* obj = new (addr) SharedObject(); + return obj; + } + + // Some simple functions that do IO to/from pipes. + // These are used to send data from child processes + // to the parent process or vice/versa. + template + T read_data(int pipe) { + T v; + int err = read(pipe, (void*)&v, sizeof(T)); + DYNET_ASSERT(err != -1, "Failed to read data from pipe in multi-processing"); + return v; + } + + template + void write_data(int pipe, const T& v) { + int err = write(pipe, (void*)&v, sizeof(T)); + DYNET_ASSERT(err != -1, "Failed to write data to pipe in multi-processing"); + } + + std::string generate_queue_name(); + std::string generate_shared_memory_name(); + + dynet::real sum_values(const std::vector& values); + dynet::real mean(const std::vector& values); + + std::string elapsed_time_string(const timespec& start, const timespec& end); + + unsigned spawn_children(std::vector& workloads); + std::vector create_workloads(unsigned num_children); + + // Called by the parent to process a chunk of data + template + S run_data_set(std::vector::iterator begin, std::vector::iterator end, const std::vector& workloads, + boost::interprocess::message_queue& mq, const WorkloadHeader& header) { + const unsigned num_children = workloads.size(); + + // Tell all the children to start up + for (unsigned cid = 0; cid < num_children; ++cid) { + bool cont = true; + write_data(workloads[cid].p2c[1], cont); + write_data(workloads[cid].p2c[1], header); + } + + // Write all the indices to the queue for the children to process + for (auto curr = begin; curr != end; ++curr) { + unsigned i = *curr; + mq.send(&i, sizeof(i), 0); + if (stop_requested) { + break; + } + } + + // Send a bunch of stop messages to the children + for (unsigned cid = 0; cid < num_children; ++cid) { + unsigned stop = -1U; + mq.send(&stop, sizeof(stop), (stop_requested ? 1 : 0)); + } + + // Wait for each child to finish training its load + std::vector losses(num_children); + for(unsigned cid = 0; cid < num_children; ++cid) { + losses[cid] = read_data(workloads[cid].c2p[0]); + } + + S total_loss = S(); + for (S& datum_loss : losses) { + total_loss += datum_loss; + } + return total_loss; + } + + template + void run_parent(const std::vector& train_data, const std::vector& dev_data, ILearner* learner, + std::vector& workloads, unsigned num_iterations, unsigned dev_frequency, unsigned report_frequency) { + const unsigned num_children = workloads.size(); + boost::interprocess::message_queue mq(boost::interprocess::open_or_create, queue_name.c_str(), 10000, sizeof(unsigned)); + std::vector train_indices(train_data.size()); + std::iota(train_indices.begin(), train_indices.end(), 0); + + std::vector dev_indices(dev_data.size()); + std::iota(dev_indices.begin(), dev_indices.end(), 0); + + S best_dev_loss = S(); + bool first_dev_run = true; + for (unsigned iter = 0; iter < num_iterations && !stop_requested; ++iter) { + // Shuffle the training data indices + std::shuffle(train_indices.begin(), train_indices.end(), *rndeng); + + S train_loss = S(); + + std::vector::iterator begin = train_indices.begin(); + while (begin != train_indices.end()) { + std::vector::iterator end = begin + dev_frequency; + if (end > train_indices.end()) { + end = train_indices.end(); + } + + + std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); + double fractional_iter = iter + 1.0 * distance(train_indices.begin(), end) / train_indices.size(); + S batch_loss = run_data_set(begin, end, workloads, mq, {false, end == train_indices.end(), report_frequency}); + std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now(); + double seconds_elapsed = std::chrono::duration_cast(end_time - start_time).count() / 1000000.0; + train_loss += batch_loss; + std::cerr << fractional_iter << "\t" << "loss = " << batch_loss << " (" << seconds_elapsed << "s)" << std::endl; + + if (stop_requested) { + break; + } + + S dev_loss = run_data_set(dev_indices.begin(), dev_indices.end(), workloads, mq, {true, false, report_frequency}); + bool new_best = (first_dev_run || dev_loss < best_dev_loss); + first_dev_run = false; + std::cerr << fractional_iter << "\t" << "dev loss = " << dev_loss << (new_best ? " (New best!)" : "") << std::endl; + if (stop_requested) { + break; + } + if (new_best) { + learner->SaveModel(); + best_dev_loss = dev_loss; + } + + begin = end; + } + } + + // Kill all children one by one and wait for them to exit + for (unsigned cid = 0; cid < num_children; ++cid) { + bool cont = false; + write_data(workloads[cid].p2c[1], cont); + wait(NULL); + } + } + + template + int run_child(unsigned cid, ILearner* learner, Trainer* trainer, + std::vector& workloads, const std::vector& train_data, + const std::vector& dev_data) { + const unsigned num_children = workloads.size(); + DYNET_ASSERT(cid >= 0 && cid < num_children, "Bad child ID " << cid << " in run_child()"); + unsigned i; + unsigned priority; + boost::interprocess::message_queue::size_type recvd_size; + boost::interprocess::message_queue mq(boost::interprocess::open_or_create, queue_name.c_str(), 10000, sizeof(unsigned)); + while (true) { + // Check if the parent wants us to exit + bool cont = read_data(workloads[cid].p2c[0]); + if (cont == 0) { + break; + } + + // Check if we're running on the training data or the dev data + WorkloadHeader header = read_data(workloads[cid].p2c[0]); + + // Run the actual training loop + S total_loss = S(); + S batch_loss = S(); + unsigned batch_counter = 0; + while (true) { + mq.receive(&i, sizeof(unsigned), recvd_size, priority); + if (i == -1U) { + break; + } + + DYNET_ASSERT(i < (header.is_dev_set ? dev_data.size() : train_data.size()), "Out-of-bounds ID in MP dev/train set"); + const D& datum = (header.is_dev_set ? dev_data[i] : train_data[i]); + S datum_loss = learner->LearnFromDatum(datum, !header.is_dev_set); + total_loss += datum_loss; + batch_loss += datum_loss; + batch_counter++; + + bool do_update = !header.is_dev_set && cid == 0; + unsigned counter = 0; + if (!header.is_dev_set) { + shared_object->counter_mutex.wait(); + counter = ++shared_object->counter; + if (do_update) { shared_object->counter = 0; } + shared_object->counter_mutex.post(); + } + if (do_update && trainer != nullptr) { + shared_object->update_mutex.wait(); + trainer->update(1.0 / counter); + shared_object->update_mutex.post(); + } + if (batch_counter == header.report_frequency) { + if (cid == 0) { + std::cerr << (header.is_dev_set ? "dev" : "train") << " loss: " << batch_loss << std::endl; + } + batch_loss = S(); + batch_counter = 0; + } + } + if (header.end_of_epoch && trainer != nullptr) { + trainer->update_epoch(); + } + + // Let the parent know that we're done and return the loss value + write_data(workloads[cid].c2p[1], total_loss); + } + return 0; + } + + template + void run_multi_process(unsigned num_children, ILearner* learner, Trainer* trainer, const std::vector& train_data, + const std::vector& dev_data, unsigned num_iterations, unsigned dev_frequency, unsigned report_frequency) { + queue_name = generate_queue_name(); + boost::interprocess::message_queue::remove(queue_name.c_str()); + boost::interprocess::message_queue::remove(queue_name.c_str()); + shared_memory_name = generate_shared_memory_name(); + shared_object = get_shared_memory(); + std::vector workloads = create_workloads(num_children); + unsigned cid = spawn_children(workloads); + if (cid < num_children) { + run_child(cid, learner, trainer, workloads, train_data, dev_data); + exit(0); + } + else { + run_parent(train_data, dev_data, learner, workloads, num_iterations, dev_frequency, report_frequency); + } + } + + template + void run_single_process(ILearner* learner, Trainer* trainer, const std::vector& train_data, + const std::vector& dev_data, unsigned num_iterations, unsigned dev_frequency, unsigned report_frequency, unsigned batch_size) { + std::vector train_indices(train_data.size()); + std::iota(train_indices.begin(), train_indices.end(), 0); + + std::vector dev_indices(dev_data.size()); + std::iota(dev_indices.begin(), dev_indices.end(), 0); + + S best_dev_loss = S(); + bool first_dev_run = true; + unsigned batch_counter = 0; + for (unsigned iter = 0; iter < num_iterations && !stop_requested; ++iter) { + // Shuffle the training data indices + std::shuffle(train_indices.begin(), train_indices.end(), *rndeng); + + S train_loss = S(); + + unsigned data_processed = 0; + unsigned data_until_report = report_frequency; + std::vector::iterator begin = train_indices.begin(); + while (begin != train_indices.end()) { + std::vector::iterator end = begin + dev_frequency; + if (end > train_indices.end()) { + end = train_indices.end(); + } + S batch_loss; + for (auto it = begin; it != end; ++it) { + unsigned i = *it; + DYNET_ASSERT(i < train_data.size(), "Out-of-bounds ID in train set for multiprocessing"); + const D& datum = train_data[i]; + S datum_loss = learner->LearnFromDatum(datum, true); + batch_loss += datum_loss; + train_loss += datum_loss; + if (++batch_counter == batch_size) { + trainer->update(1.0 / batch_size); + batch_counter = 0; + } + data_processed++; + + if (--data_until_report == 0) { + data_until_report = report_frequency; + double fractional_iter = iter + 1.0 * data_processed / train_indices.size(); + std::cerr << fractional_iter << "\t" << "loss = " << batch_loss << std::endl; + batch_loss = S(); + } + } + + if (stop_requested) { + break; + } + + S dev_loss; + for (auto it = dev_indices.begin(); it != dev_indices.end(); ++it) { + unsigned i = *it; + DYNET_ASSERT(i < dev_data.size(), "Out-of-bounds ID in dev set for multiprocessing"); + const D& datum = dev_data[i]; + S datum_loss = learner->LearnFromDatum(datum, false); + dev_loss += datum_loss; + } + bool new_best = (first_dev_run || dev_loss < best_dev_loss); + first_dev_run = false; + double fractional_iter = iter + 1.0 * data_processed / train_indices.size(); + std::cerr << fractional_iter << "\t" << "dev loss = " << dev_loss << (new_best ? " (New best!)" : "") << std::endl; + if (stop_requested) { + break; + } + trainer->update_epoch(); + if (new_best) { + learner->SaveModel(); + best_dev_loss = dev_loss; + } + + begin = end; + } + } + } + + void cleanup(const std::vector& workloads); + + template + S run_simple_parent(const std::vector& train_data, ILearner* learner, std::vector& workloads) { + const unsigned num_children = workloads.size(); + boost::interprocess::message_queue mq(boost::interprocess::open_or_create, queue_name.c_str(), 10000, sizeof(unsigned)); + std::vector train_indices(train_data.size()); + std::iota(train_indices.begin(), train_indices.end(), 0); + + S train_loss = S(); + + std::vector::iterator begin = train_indices.begin(); + std::vector::iterator end = train_indices.end(); + S batch_loss = run_data_set(begin, end, workloads, mq, {false, true, (unsigned)-1}); + train_loss += batch_loss; + + // Kill all children one by one and wait for them to exit + for (unsigned cid = 0; cid < num_children; ++cid) { + bool cont = false; + write_data(workloads[cid].p2c[1], cont); + wait(NULL); + } + + return train_loss; + } + + template + S run_mp_minibatch(unsigned num_children, ILearner* learner, const std::vector& data) { + queue_name = generate_queue_name(); + boost::interprocess::message_queue::remove(queue_name.c_str()); + boost::interprocess::message_queue::remove(queue_name.c_str()); + shared_memory_name = generate_shared_memory_name(); + shared_object = get_shared_memory(); + std::vector workloads = create_workloads(num_children); + std::vector dev_data; + Trainer* trainer = nullptr; + unsigned cid = spawn_children(workloads); + if (cid < num_children) { + run_child(cid, learner, trainer, workloads, data, dev_data); + exit(0); + } + else { + S return_value = run_simple_parent(data, learner, workloads); + cleanup(workloads); + return return_value; + } + } + + template + S run_sp_minibatch_trainer(ILearner* learner, Trainer* inputTrainer, const std::vector& data) { + Trainer* trainer = inputTrainer; + S total_loss; + for (unsigned i = 0; i < data.size(); ++i) { + const D& datum = data[i]; + S datum_loss = learner->LearnFromDatum(datum, (trainer != nullptr)); + total_loss += datum_loss; + + if (trainer != nullptr) { + trainer->update(); + } + } + return total_loss; + } + + template + S run_mp_minibatch_trainer(unsigned num_children, ILearner* learner, Trainer* inputTrainer, const std::vector& data) { + queue_name = generate_queue_name(); + boost::interprocess::message_queue::remove(queue_name.c_str()); + boost::interprocess::message_queue::remove(queue_name.c_str()); + shared_memory_name = generate_shared_memory_name(); + shared_object = get_shared_memory(); + std::vector workloads = create_workloads(num_children); + std::vector dev_data; + Trainer* trainer = inputTrainer; + unsigned cid = spawn_children(workloads); + if (cid < num_children) { + run_child(cid, learner, trainer, workloads, data, dev_data); + exit(0); + } + else { + S return_value = run_simple_parent(data, learner, workloads); + cleanup(workloads); + return return_value; + } + } + } +} +#endif // !_WINDOWS diff --git a/thirdparty/dynet/dynet/nodes-common.cc b/thirdparty/dynet/dynet/nodes-common.cc new file mode 100644 index 000000000..df300b42a --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-common.cc @@ -0,0 +1,1139 @@ +#include "dynet/nodes.h" + +#include +#include +#include + +#include "dynet/nodes-macros.h" +#include "dynet/globals.h" + +using namespace std; + +namespace dynet { + +string AddVectorToAllColumns::as_string(const vector& arg_names) const { + ostringstream os; + os << "colwise_add(" << arg_names[0] << ", " << arg_names[1] << ')'; + return os.str(); +} + +Dim AddVectorToAllColumns::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && + xs[0].rows() == xs[1].rows() && + xs[0].ndims() == 2 && + (xs[1].ndims() == 1 || (xs[1].ndims() == 2 && xs[1].cols() == 1)), + "Bad input dimensions in AddVectorToAllColumns: " << xs); + return Dim({xs[0][0], xs[0][1]}, max(xs[0].bd,xs[1].bd)); +} + +string SparsemaxLoss::as_string(const vector& arg_names) const { + ostringstream s; + s << "sparsemax(" << arg_names[0] << ", q)"; + return s.str(); +} + +Dim SparsemaxLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in SparsemaxLoss: " << xs); + return Dim({1}); +} + +string Sparsemax::as_string(const vector& arg_names) const { + ostringstream s; + s << "sparsemax(" << arg_names[0] << ")"; + return s.str(); +} + +Dim Sparsemax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in Sparsemax: " << xs); + return xs[0]; +} + +string MatrixInverse::as_string(const vector& arg_names) const { + ostringstream s; + s << "inverse(" << arg_names[0] << ")"; + return s.str(); +} + +Dim MatrixInverse::dim_forward(const vector& xs) const { + return xs[0]; +} + +string LogDet::as_string(const vector& arg_names) const { + ostringstream s; + s << "logdet(" << arg_names[0] << ")"; + return s.str(); +} + +Dim LogDet::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs[0].ndims() <= 2 && (xs[0].rows() == xs[0].cols()), "Bad arguments in LogDet: " << xs); + return Dim({1}); +} + +string SelectRows::as_string(const vector& arg_names) const { + ostringstream s; + s << "select_rows(" << arg_names[0] << ", {rsize=" << prows->size() << "})"; + return s.str(); +} + +Dim SelectRows::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && xs[0].ndims() == 2, "Bad arguments in SelectRows: " << xs); + unsigned nrows = prows->size(); + if (xs[0].ndims() == 1) return Dim({nrows}); + return Dim({nrows, xs[0].cols()}); +} + +string SelectCols::as_string(const vector& arg_names) const { + ostringstream s; + s << "select_cols(" << arg_names[0] << ", {csize=" << pcols->size() << "})"; + return s.str(); +} + +Dim SelectCols::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && xs[0].ndims() == 2, "Bad arguments in SelectCols: " << xs); + unsigned ncols = pcols->size(); + return Dim({xs[0].rows(), ncols}); +} + +string Min::as_string(const vector& arg_names) const { + ostringstream s; + s << "min{" << arg_names[0] << ", " << arg_names[1] << "}"; + return s.str(); +} + +Dim Min::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in Min: " << xs); + return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; +} + +string Max::as_string(const vector& arg_names) const { + ostringstream s; + s << "max{" << arg_names[0] << ", " << arg_names[1] << "}"; + return s.str(); +} + +Dim Max::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in Max: " << xs); + return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; +} + +string TraceOfProduct::as_string(const vector& arg_names) const { + ostringstream s; + s << "Tr(" << arg_names[0] << " * " << arg_names[1] << "^T)"; + return s.str(); +} + +Dim TraceOfProduct::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in TraceOfProduct: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +string ConstScalarMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " * " << alpha; + return s.str(); +} + +Dim ConstScalarMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "ConstScalarMultiply expects one argument: " << xs); + return xs[0]; +} + +string DotProduct::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << "^T . " << arg_names[1]; + return s.str(); +} + +Dim DotProduct::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && + xs[0].single_batch() == xs[1].single_batch(), + "Bad arguments to DotProduct: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +string Transpose::as_string(const vector& arg_names) const { + ostringstream s; + s << "transpose("<< arg_names[0] << ", "; + for(size_t i = 0; i < dims.size(); ++i) + s << (i == 0?'{':',') << dims[i]; + s << "})"; + return s.str(); +} + +Dim Transpose::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Bad arguments to Transpose: " << xs); + DYNET_ARG_CHECK(xs[0].nd == dims.size() || xs[0].num_nonone_dims() == 1, "Dimensions passed to transpose (" << dims.size() << ") must be equal to dimensions in input tensor (" << xs[0].nd << ')'); + Dim ret(xs[0]); + ret.nd = dims.size(); + for(size_t i = 0; i < dims.size(); ++i) + ret.d[i] = xs[0][dims[i]]; + return ret; +} + +string Reshape::as_string(const vector& arg_names) const { + ostringstream s; + s << "reshape(" << arg_names[0] << " --> " << to << ')'; + return s.str(); +} + +Dim Reshape::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Reshape") + if(to.size() == xs[0].size()) { + return to; + } else { + DYNET_ARG_CHECK(to.batch_elems() == 1 && to.batch_size() == xs[0].batch_size(), + "Bad arguments to Reshape: " << to << ", " << xs[0]); + Dim ret(to); + ret.bd = xs[0].batch_elems(); + return ret; + } +} + +string KMHNGram::as_string(const vector& arg_names) const { + ostringstream s; + s << "kmh-ngram(" << arg_names[0] << ')'; + return s.str(); +} + +Dim KMHNGram::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs[0].ndims() == 2, "Bad input dimensions in KMHNGram: " << xs); + const unsigned new_cols = xs[0].cols() - n + 1; + DYNET_ARG_CHECK(new_cols >= 1, "Bad input dimensions in KMHNGram: " << xs); + return Dim({xs[0][0], new_cols}); +} + +string GaussianNoise::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " + N(0," << stddev << ')'; + return s.str(); +} + +Dim GaussianNoise::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in GaussianNoise") + return xs[0]; +} + +string Dropout::as_string(const vector& arg_names) const { + ostringstream s; + s << "dropout(" << arg_names[0] << ",p=" << p << ')'; + return s.str(); +} + +Dim Dropout::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Dropout") + return xs[0]; +} + +string DropoutBatch::as_string(const vector& arg_names) const { + ostringstream s; + s << "dropout_batch(" << arg_names[0] << ",p=" << p << ')'; + return s.str(); +} + +Dim DropoutBatch::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in DropoutBatch") + return xs[0]; +} + +string DropoutDim::as_string(const vector& arg_names) const { + ostringstream s; + s << "dropout_dim(" << arg_names[0] << ",p=" << p << ')'; + return s.str(); +} + +Dim DropoutDim::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in DropoutDim") + DYNET_ARG_CHECK(xs[0].nd < 4, "DropoutDim only supports tensor up to order 3 + batch dimension, got tensor of order"< dimension, "In DropoutDim : tried to drop along dimension "<& arg_names) const { + ostringstream s; + s << "block_dropout(" << arg_names[0] << ",dropout_probability=" << dropout_probability << ')'; + return s.str(); +} + +Dim BlockDropout::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in BlockDropout") + return xs[0]; +} + +string ConstantPlusX::as_string(const vector& arg_names) const { + ostringstream s; + s << c << " + " << arg_names[0]; + return s.str(); +} + +Dim ConstantPlusX::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ConstantPlusX") + return xs[0]; +} + +string ConstantMinusX::as_string(const vector& arg_names) const { + ostringstream s; + s << c << " - " << arg_names[0]; + return s.str(); +} + +Dim ConstantMinusX::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ConstantMinusX") + return xs[0]; +} + +string LogSumExp::as_string(const vector& arg_names) const { + ostringstream s; + s << "log(exp " << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) + s << " + exp " << arg_names[i]; + s << ")"; + return s.str(); +} + +Dim LogSumExp::dim_forward(const vector& xs) const { + Dim d = xs[0].truncate(); + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(d.single_batch() == xs[i].truncate().single_batch(), + "Mismatched input dimensions in LogSumExp: " << xs); + d.bd = max(xs[i].bd, d.bd); + } + return d; +} +string Sum::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) + s << " + " << arg_names[i]; + return s.str(); +} + +Dim Sum::dim_forward(const vector& xs) const { + Dim d = xs[0].truncate(); + unsigned int batch = d.bd; + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(d.single_batch() == xs[i].truncate().single_batch(), + "Mismatched input dimensions in Sum: " << xs); + batch = max(xs[i].bd, batch); + } + d = xs[0]; d.bd = batch; + return d; +} + +string SumElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "sum_elems( " << arg_names[0] << " )"; + return s.str(); +} + +Dim SumElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SumElements") + return Dim({1}, xs[0].bd); +} + +string SumBatches::as_string(const vector& arg_names) const { + ostringstream s; + s << "sum_batches( " << arg_names[0] << " )"; + return s.str(); +} + +Dim SumBatches::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SumBatches") + return xs[0].single_batch(); +} + +string MomentElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_elems( expression=" << arg_names[0] << ", order=" << order << " )"; + return s.str(); +} + +Dim MomentElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MomentElements") + DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentElements (recieved "<& arg_names) const { + ostringstream s; + s << "moment_batches( expression=" << arg_names[0] << ", order=" << order << " )"; + return s.str(); +} + +Dim MomentBatches::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MomentBatches") + DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentBatches (recieved "<& arg_names) const { + ostringstream s; + s << "std_elems( expression=" << arg_names[0] << " )"; + return s.str(); +} + +Dim StdElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in StdElements") + return Dim({1}, xs[0].bd); +} + +string StdBatches::as_string(const vector& arg_names) const { + ostringstream s; + s << "std_batches( expression=" << arg_names[0] << " )"; + return s.str(); +} + +Dim StdBatches::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in StdBatches") + + return xs[0].single_batch(); +} + +string StdDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_dim(expression=" << arg_names[0] << ',' << dimension <<'}'; + return s.str(); +} + +Dim StdDimension::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in StdDimension"); + DYNET_ARG_CHECK(xs[0].nd <= 3, "StdDimension implemented up to tensors of order 3 (with minibatch) for now") + DYNET_ARG_CHECK(dimension < xs[0].nd, "dimension " << dimension << " is out of bounds of tensor of order " << xs[0].nd << " in StdDimension" ) + Dim ret(xs[0]); + ret.delete_dim(dimension); + return ret; +} + +string MomentDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_dim(expression=" << arg_names[0] << ',' << dimension << ", order="<& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in MomentDimension"); + DYNET_ARG_CHECK(xs[0].nd <= 3, "MomentDimension implemented up to tensors of order 3 (with minibatch) for now") + DYNET_ARG_CHECK(dimension < xs[0].nd, "dimension " << dimension << " is out of bounds of tensor of order " << xs[0].nd << " in MomentDimension" ) + DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentDimension (recieved "<& arg_names) const { + ostringstream s; + s << "average(" << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) + s << ", " << arg_names[i]; + s << ")"; + return s.str(); +} + +Dim Average::dim_forward(const vector& xs) const { + Dim d(xs[0]); + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(xs[0].single_batch() == xs[i].single_batch(), + "Mismatched input dimensions in Average: " << xs); + d.bd = max(xs[i].bd, d.bd); + } + return d; +} + +string Sqrt::as_string(const vector& arg_names) const { + ostringstream s; + s << "sqrt(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Sqrt::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Sqrt") + return xs[0]; +} + +string Abs::as_string(const vector& arg_names) const { + ostringstream s; + s << "abs(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Abs::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Abs") + return xs[0]; +} + + +string Erf::as_string(const vector& arg_names) const { + ostringstream s; + s << "erf(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Erf::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Erf") + return xs[0]; +} + +string Tanh::as_string(const vector& arg_names) const { + ostringstream s; + s << "tanh(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Tanh::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Tanh") + return xs[0]; +} + +string Square::as_string(const vector& arg_names) const { + ostringstream s; + s << "square(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Square::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Square") + return xs[0]; +} + +string Cube::as_string(const vector& arg_names) const { + ostringstream s; + s << "cube(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Cube::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Cube") + return xs[0]; +} + +string Exp::as_string(const vector& arg_names) const { + ostringstream os; + os << "exp(" << arg_names[0] << ')'; + return os.str(); +} + +Dim Exp::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Exp") + return xs[0]; +} + +string LogGamma::as_string(const vector& arg_names) const { + ostringstream os; + os << "lgamma(" << arg_names[0] << ')'; + return os.str(); +} + +Dim LogGamma::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in LogGamma") + return xs[0]; +} + +string Log::as_string(const vector& arg_names) const { + ostringstream os; + os << "log(" << arg_names[0] << ')'; + return os.str(); +} + +Dim Log::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Log") + return xs[0]; +} + +string Concatenate::as_string(const vector& arg_names) const { + ostringstream os; + os << "concat({" << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) { + os << ',' << arg_names[i]; + } + os << "}, " << dimension << ')'; + return os.str(); +} + +Dim Concatenate::dim_forward(const vector& xs) const { + unsigned new_rows = 0; + Dim dr = xs[0]; + for (auto c : xs) { + if(dr.nd < c.nd) dr.resize(c.nd); + if(c.nd < dr.nd) c.resize(dr.nd); + new_rows += c[dimension]; + dr.set(dimension, c[dimension]); + DYNET_ARG_CHECK(dr.single_batch() == c.single_batch(), + "Bad input dimensions in Concatenate: " << xs); + dr.bd = max(dr.bd, c.bd); + } + dr.nd = max(xs[0].nd, dimension+1); + dr.set(dimension, new_rows); + return dr; +} + +string ConcatenateToBatch::as_string(const vector& arg_names) const { + ostringstream os; + os << "concat_batch_elems(" << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) { + os << ',' << arg_names[i]; + } + os << ')'; + return os.str(); +} + +Dim ConcatenateToBatch::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() > 0, "Failed input count check in ConcatenateToBatch") + Dim d(xs[0]); + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(xs[0].single_batch() == xs[i].single_batch(), + "Mismatched input dimensions in ConcatenateToBatch: " << xs); + d.bd += xs[i].bd; + } + return d; +} + +string PairwiseRankLoss::as_string(const vector& arg_names) const { + ostringstream os; + os << "max(0, " << margin << " - " << arg_names[0] << " + " << arg_names[1] << ')'; + return os.str(); +} + +Dim PairwiseRankLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && + xs[0] == xs[1] && + xs[0].rows() == 1 && + (xs[0].ndims() == 1 || xs[0].ndims() == 2), + "Bad input dimensions in PairwiseRankLoss: " << xs); + return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; +} + +string Hinge::as_string(const vector& arg_names) const { + ostringstream os; + os << "hinge(" << arg_names[0] << ", pe=" << pelement << ", m=" << margin << ')'; + return os.str(); +} + +Dim Hinge::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in Hinge: " << xs); + return Dim({1}, xs[0].bd); +} + +string Identity::as_string(const vector& arg_names) const { + return arg_names[0]; +} + +Dim Identity::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Identity") + return xs[0]; +} + +string NoBackprop::as_string(const vector& arg_names) const { + ostringstream s; + s << "nobackprop(" << arg_names[0] << ')'; + return s.str(); +} + +Dim NoBackprop::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in NoBackprop") + return xs[0]; +} + +string FlipGradient::as_string(const vector& arg_names) const { + ostringstream s; + s << "flip_gradient(" << arg_names[0] << ')'; + return s.str(); +} + +Dim FlipGradient::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in FlipGradient"); + return xs[0]; +} + +string Softmax::as_string(const vector& arg_names) const { + ostringstream s; + s << "softmax(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Softmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Softmax"); + DYNET_ARG_CHECK(xs[0].nd <= 2, "Bad input dimensions in Softmax, must be 2 or fewer: " << xs); + return xs[0]; +} + +string SoftSign::as_string(const vector& arg_names) const { + ostringstream s; + s << "softsign(" << arg_names[0] << ')'; + return s.str(); +} + +Dim SoftSign::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SoftSign"); + DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in SoftSign: " << xs); + return xs[0]; +} + +string PickNegLogSoftmax::as_string(const vector& arg_names) const { + ostringstream s; + if(pval) { + s << "log_softmax(" << arg_names[0] << ")_{" << *pval << '}'; + } else { + s << "log_softmax(" << arg_names[0] << ")_{"; + string sep = ""; + for(auto v : *pvals) { s << sep << v; sep = ","; } + s << '}'; + } + return s.str(); +} + +Dim PickNegLogSoftmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickNegLogSoftmax"); + DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in PickNegLogSoftmax: " << xs); + DYNET_ARG_CHECK((pval == nullptr || xs[0].bd == 1), + "PickNegLogSoftmax was called with a single ID (" << *pval << + "), but the expression under consideration had multiple mini-batch elements (" << + xs[0].bd << "). A vector of IDs of size " << xs[0].bd << " must be passed instead."); + DYNET_ARG_CHECK((pvals == nullptr || xs[0].bd == pvals->size()), + "The number of IDs passed to PickNegLogSoftmax (" << pvals->size() << + "), did not match the number of mini-batch elements in the expression under consideration (" << + xs[0].bd << "). These numbers must match."); + return Dim({1}, xs[0].bd); +} + +string LogSoftmax::as_string(const vector& arg_names) const { + ostringstream s; + s << "log_softmax(" << arg_names[0] << ')'; + return s.str(); +} + +Dim LogSoftmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in LogSoftmax") + DYNET_ARG_CHECK(xs[0].nd <= 2, "Bad input dimensions in LogSoftmax, must be 2 or fewer: " << xs); + return xs[0]; +} + +string RestrictedLogSoftmax::as_string(const vector& arg_names) const { + ostringstream s; + s << "r_log_softmax(" << arg_names[0] << ')'; + return s.str(); +} + +Dim RestrictedLogSoftmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in RestrictedLogSoftmax") + DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in RestrictedLogSoftmax: " << xs); + return xs[0]; +} + +string PickElement::as_string(const vector& arg_names) const { + ostringstream s; + s << "pick(" << arg_names[0] << ','; + if(pval) { + s << *pval; + } else { + DYNET_ASSERT(pvals, "Have neither index nor index vector in PickElement"); + s << '['; + if(pvals->size()) { + s << (*pvals)[0]; + for(size_t i = 1; i < pvals->size(); ++i) + s << ',' << (*pvals)[i]; + } + s << "]"; + } + s << ", " << dimension << ")"; + return s.str(); +} + +Dim PickElement::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickElement"); + DYNET_ARG_CHECK(dimension < xs[0].nd, + "Tried to PickElement on dimension " << dimension << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "PickElement not currently supported for tensors of 4 or more dimensions."); + + Dim ret(xs[0]); + if (pvals){ + DYNET_ARG_CHECK(xs[0].bd == 1 || xs[0].bd == pvals->size(), + "Number of elements in the passed-in index vector (" << pvals->size() << ")" + " did not match number of elements in mini-batch elements in expression (of dimension " << xs[0].bd << ") in PickElement"); + ret.bd = pvals->size(); + } + + ret.delete_dim(dimension); + return ret; +} + +// x_1 is a vector +// y = (x_1)[start:end] +string PickRange::as_string(const vector& arg_names) const { + ostringstream s; + s << "slice(" << arg_names[0] << ',' << start << ':' << end << ", dim=" << dim << ')'; + return s.str(); +} + +Dim PickRange::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickRange"); + DYNET_ARG_CHECK(dim < xs[0].nd && start < end && xs[0][dim] >= end, + "Bad input dimensions or range in PickRange: " << xs << " range(" << start << ", " << end << ") with dim=" << dim); + Dim ret = xs[0]; ret.d[dim] = end-start; + return ret; +} + +string PickBatchElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "pick_batch_elems(" << arg_names[0] << ','; + if (pval) { + s << *pval; + } else { + DYNET_ASSERT(pvals, "Have neither index nor index vector in PickBatchElements"); + s << '['; + if (pvals->size()) { + s << (*pvals)[0]; + for (size_t i = 1; i < pvals->size(); ++i) + s << ',' << (*pvals)[i]; + } + s << "]"; + } + s << ")"; + return s.str(); +} + +Dim PickBatchElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickBatchElements") + DYNET_ARG_CHECK(xs[0].nd < 4, "PickElement not currently supported for tensors of 4 or more dimensions."); + Dim ret(xs[0]); + if (pval) { + // set batch size to one. + ret.bd = 1; + } else { + DYNET_ASSERT(pvals, "Have neither index nor index vector in PickBatchElements"); + ret.bd = pvals->size(); + } + return ret; +} + +string MatrixMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " * " << arg_names[1]; + return s.str(); +} + +Dim MatrixMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in MatrixMultiply") + DYNET_ARG_CHECK(xs[0].cols() == xs[1].rows(), "Mismatched input dimensions in MatrixMultiply: " << xs); + if (xs[1].ndims() == 1) return Dim({xs[0].rows()}, max(xs[0].bd, xs[1].bd)); + return Dim({xs[0].rows(), xs[1].cols()}, max(xs[0].bd, xs[1].bd)); +} + +string CwiseMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " \\cdot " << arg_names[1]; + return s.str(); +} + +Dim CwiseMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in CwiseMultiply") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(d.single_batch() == xs[1].truncate().single_batch(), + "Mismatched input dimensions in CwiseMultiply: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +string ScalarAdd::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " + " << arg_names[1]; + return s.str(); +} + +Dim ScalarAdd::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarAdd") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(xs[1].batch_size() == 1, + "Mismatched input dimensions in ScalarAdd: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +string ScalarMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " \\cdot " << arg_names[1]; + return s.str(); +} + +Dim ScalarMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarMultiply") + Dim d = xs[1]; + DYNET_ARG_CHECK(xs[0].batch_size() == 1, + "Mismatched input dimensions in ScalarMultiply: " << xs); + d.bd = max(xs[0].bd, d.bd); + return d; +} + +string ScalarQuotient::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " / " << arg_names[1]; + return s.str(); +} + +Dim ScalarQuotient::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarQuotient") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(xs[1].batch_size() == 1, + "Mismatched input dimensions in ScalarQuotient: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + + +string Pow::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " ** " << arg_names[1]; + return s.str(); +} + +Dim Pow::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in Pow") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(xs[1].truncate().single_batch().size() == 1, "Bad input dimensions in Pow: " << xs); + return d; +} + +string CwiseQuotient::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " / " << arg_names[1]; + return s.str(); +} + +Dim CwiseQuotient::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in CwiseQuotient") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(d.single_batch() == xs[1].truncate().single_batch(), "Bad input dimensions in CwiseQuotient: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +string AffineTransform::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); i += 2) + s << " + " << arg_names[i] << " * " << arg_names[i+1]; + return s.str(); +} + +Dim AffineTransform::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK((xs.size() - 1) % 2 == 0, "Bad number of inputs in AffineTransform: " << xs); + if(xs.size() == 1) return xs[0]; + DYNET_ARG_CHECK(xs[0].rows() == xs[1].rows() && xs[1].cols() == xs[2].rows(), + "Bad dimensions for AffineTransform: " << xs); + Dim d = (xs[2].cols() != 1 ? + Dim({xs[0].rows(), xs[2].cols()}, max(max(xs[0].bd, xs[1].bd), xs[2].bd)) : + Dim({xs[0].rows()}, max(max(xs[0].bd, xs[1].bd), xs[2].bd))); + for (unsigned i = 3; i < xs.size(); i += 2) { + DYNET_ARG_CHECK(xs[i].cols() == xs[i+1].rows() && d.rows() == xs[i].rows() && d.cols() == xs[i+1].cols(), + "Bad dimensions for AffineTransform: " << xs); + d.bd = max(max(d.bd, xs[i].bd), xs[i+1].bd); + } + return d; +} + +string Negate::as_string(const vector& arg_names) const { + ostringstream s; + s << '-' << arg_names[0]; + return s.str(); +} + +Dim Negate::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Negate"); + return xs[0]; +} + +string Rectify::as_string(const vector& arg_names) const { + ostringstream s; + s << "ReLU(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Rectify::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Rectify"); + return xs[0]; +} + +string HuberDistance::as_string(const vector& arg_names) const { + ostringstream s; + s << "|| " << arg_names[0] << " - " << arg_names[1] << " ||_H(" << d << ')'; + return s.str(); +} + +Dim HuberDistance::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in HuberDistance"); + DYNET_ARG_CHECK(xs[0].single_batch() == xs[1].single_batch() || + (LooksLikeVector(xs[0]) && LooksLikeVector(xs[1]) && xs[0].batch_size() == xs[1].batch_size()), + "Mismatched input dimensions in HuberDistance: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +string L1Distance::as_string(const vector& arg_names) const { + ostringstream s; + s << "|| " << arg_names[0] << " - " << arg_names[1] << " ||_1"; + return s.str(); +} + +Dim L1Distance::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in L1Distance") + DYNET_ARG_CHECK(xs[0].single_batch() == xs[1].single_batch() || + (LooksLikeVector(xs[0]) && LooksLikeVector(xs[1]) && xs[0].batch_size() == xs[1].batch_size()), + "Mismatched input dimensions in L1Distance: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +string PoissonRegressionLoss::as_string(const vector& arg_names) const { + ostringstream s; + s << "-log Poisson(" << pty << "; lambda=\\exp" << arg_names[0] << ')'; + return s.str(); +} + +Dim PoissonRegressionLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && xs[0].size() == 1, "Bad input dimensions in PoissonRegressionLoss: " << xs); + return xs[0]; +} + +string SquaredNorm::as_string(const vector& arg_names) const { + ostringstream s; + s << "|| " << arg_names[0] << " ||^2"; + return s.str(); +} + +Dim SquaredNorm::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SquaredNorm") + return Dim({1}, xs[0].bd); +} + +string L2Norm::as_string(const vector& arg_names) const { + ostringstream s; + s << "|| " << arg_names[0] << " ||"; + return s.str(); +} + +Dim L2Norm::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in L2Norm") + return Dim({1}, xs[0].bd); +} + + +string SquaredEuclideanDistance::as_string(const vector& arg_names) const { + ostringstream s; + s << "|| " << arg_names[0] << " - " << arg_names[1] << " ||^2"; + return s.str(); +} + +Dim SquaredEuclideanDistance::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in SquaredEuclideanDistance") + DYNET_ARG_CHECK(xs[0].single_batch() == xs[1].single_batch() || + (LooksLikeVector(xs[0]) && LooksLikeVector(xs[1]) && xs[0].batch_size() == xs[1].batch_size()), + "Bad input dimensions in SquaredEuclideanDistance: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +string LogisticSigmoid::as_string(const vector& arg_names) const { + ostringstream s; + s << "\\sigma(" << arg_names[0] << ')'; + return s.str(); +} + +Dim LogisticSigmoid::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in LogisticSigmoid") + return xs[0]; +} + +string BinaryLogLoss::as_string(const vector& arg_names) const { + ostringstream os; + os << "binary_log_loss(" << arg_names[0] << ", " << arg_names[1] << ')'; + return os.str(); +} + +Dim BinaryLogLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in BinaryLogLoss") + DYNET_ARG_CHECK(xs[0].rows() == 2 || xs[0].ndims() == 1, "Bad input dimensions in BinaryLogLoss: " << xs); + DYNET_ARG_CHECK(xs[1].rows() == 2 || xs[1].ndims() == 1, "Bad input dimensions in BinaryLogLoss: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +string Zeroes::as_string(const vector& arg_names) const { + ostringstream s; + s << "zeroes(" << dim << ')'; + return s.str(); +} + +Dim Zeroes::dim_forward(const vector& xs) const { + return dim; +} + +string RandomNormal::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_normal(" << dim << ')'; + return s.str(); +} + +Dim RandomNormal::dim_forward(const vector& xs) const { + return dim; +} + +string RandomBernoulli::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_bernoulli(" << dim << ", " << p << ')'; + return s.str(); +} + +Dim RandomBernoulli::dim_forward(const vector& xs) const { + return dim; +} + +string RandomUniform::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_uniform(" << dim << ", " << left << ", " << right << ')'; + return s.str(); +} + +Dim RandomUniform::dim_forward(const vector& xs) const { + return dim; +} + +string RandomGumbel::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_gumbel(" << dim << ", " << mu << ", " << beta << ')'; + return s.str(); +} + +Dim RandomGumbel::dim_forward(const vector& xs) const { + return dim; +} + +string MaxDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "max_dim(" << arg_names[0] << ", reduced_dim=" << reduced_dim << ')'; + return s.str(); +} + +Dim MaxDimension::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MaxDimension"); + DYNET_ARG_CHECK(reduced_dim < xs[0].nd, + "Tried to MaxDimension on dimension " << reduced_dim << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "MaxDimension not currently supported for tensors of 4 or more dimensions."); + Dim ret(xs[0]); + ret.delete_dim(reduced_dim); + return ret; +} + +string MinDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "min_dim(" << arg_names[0] << ", reduced_dim=" << reduced_dim << ')'; + return s.str(); +} + +Dim MinDimension::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MinDimension"); + DYNET_ARG_CHECK(reduced_dim < xs[0].nd, + "Tried to MinDimension on dimension " << reduced_dim << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "MinDimension not currently supported for tensors of 4 or more dimensions."); + Dim ret(xs[0]); + ret.delete_dim(reduced_dim); + return ret; +} + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/nodes-contract.cc b/thirdparty/dynet/dynet/nodes-contract.cc new file mode 100644 index 000000000..b68a6e0f8 --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-contract.cc @@ -0,0 +1,183 @@ +#include "dynet/nodes-contract.h" + +#include +#include +#include + +#include "dynet/nodes-macros.h" + +// This file takes a long time to compile on GPU. Uncomment this line to skip it. +#define DYNET_SKIP_CUDA_CONTRACTIONS + +using namespace std; + +namespace dynet { + +#ifndef __CUDACC__ + +string InnerProduct3D_1D::as_string(const vector& arg_names) const { + ostringstream s; + s << "dot(" << arg_names[0] << "," << arg_names[1] << ')'; + if (arg_names.size() == 3) s << " + " << arg_names[2]; + return s.str(); +} + +Dim InnerProduct3D_1D::dim_forward(const vector& xs) const { + if (xs.size() != 2 && xs.size() != 3) + throw std::invalid_argument("Expected two or three arguments in InnerProduct3D_1D"); + if (xs[0].ndims() != 3 || + !LooksLikeVector(xs[1]) || + xs[0].size(2) != xs[1].size(0)) { + ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D: " << xs; + throw std::invalid_argument(s.str()); + } + Dim d({xs[0].size(0), xs[0].size(1)}, max(xs[0].bd, xs[1].bd)); + if(xs.size() == 3) d.bd = max(d.bd, xs[2].bd); + if (xs.size() == 3 && xs[2] != d) { + ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D: " << xs; + throw std::invalid_argument(s.str()); + } + return d; +} + +string InnerProduct3D_1D_1D::as_string(const vector& arg_names) const { + ostringstream s; + s << "dotdot(" << arg_names[0] << "," << arg_names[1] << "," << arg_names[2] << ')'; + if (arg_names.size() == 4) s << " + " << arg_names[3]; + return s.str(); +} + +Dim InnerProduct3D_1D_1D::dim_forward(const vector& xs) const { + if (xs.size() != 3 && xs.size() != 4) + throw std::invalid_argument("Expected three or four arguments in InnerProduct3D_1D"); + if (xs[0].ndims() != 3 || + !LooksLikeVector(xs[1]) || + !LooksLikeVector(xs[2])) { + // TODO fix add check + ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D_1D: " << xs; + throw std::invalid_argument(s.str()); + } + Dim d({xs[0].size(0)}, max(max(xs[0].bd, xs[1].bd), xs[2].bd)); + if(xs.size() == 4) d.bd = max(d.bd, xs[3].bd); + if (xs.size() == 4 && xs[3] != d) { + ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D_1D: " << xs; + throw std::invalid_argument(s.str()); + } + return d; +} + +#endif + +// Y_ij = A_ijk * B_k (+ C_ij) +template +void InnerProduct3D_1D::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#if defined(__CUDACC__) && defined(DYNET_SKIP_CUDA_CONTRACTIONS) + throw std::runtime_error("InnerProduct3D_1D::forward_dev_impl disabled on CUDA. Comment out DYNET_SKIP_CUDA_CONTRACTIONS in nodes-contract.cc to enable this function."); +#else + auto A = xs[0]->t<3>(); + auto b = xs[1]->t<1>(); + typedef Eigen::Tensor::DimensionPair DimPair; + Eigen::array dims({{DimPair(2, 0)}}); + if (xs.size() == 2) { + fx.t<2>().device(*dev.edevice) = A.contract(b, dims); + } else { + auto C = xs[2]->t<2>(); + fx.t<2>().device(*dev.edevice) = A.contract(b, dims) + C; + } +#endif +} + +template +void InnerProduct3D_1D::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#if defined(__CUDACC__) && defined(DYNET_SKIP_CUDA_CONTRACTIONS) + throw std::runtime_error("InnerProduct3D_1D::backward_dev_impl disabled on CUDA. Comment out DYNET_SKIP_CUDA_CONTRACTIONS in nodes-contract.cc to enable this function."); +#else + auto tdEdf = dEdf.t<2>(); // 2 tensor + typedef Eigen::Tensor::DimensionPair DimPair; + if (i == 0) { // 3 tensor + // tensor product + auto b = xs[1]->t<1>(); + dEdxi.t<3>().device(*dev.edevice) += tdEdf.contract(b, Eigen::array{{}}); + } else if (i == 1) { + auto A = xs[0]->t<3>(); // A is 3 tensor + Eigen::array dims({{DimPair(0, 0), DimPair(1, 1)}}); + dEdxi.t<1>().device(*dev.edevice) += tdEdf.contract(A, dims); + } else if (i == 2) { + dEdxi.t<2>().device(*dev.edevice) += tdEdf; + } else { + throw std::runtime_error("Illegal configuration in InnerProduct3D"); + } +#endif +} +DYNET_NODE_INST_DEV_IMPL(InnerProduct3D_1D) + +// Y_ij = A_ijk * B_k * C_j (+ D_i) +template +void InnerProduct3D_1D_1D::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#if defined(__CUDACC__) && defined(DYNET_SKIP_CUDA_CONTRACTIONS) + throw std::runtime_error("InnerProduct3D_1D_1D::forward_dev_impl disabled on CUDA. Comment out DYNET_SKIP_CUDA_CONTRACTIONS in nodes-contract.cc to enable this function."); +#else + auto A = xs[0]->t<3>(); + auto b = xs[1]->t<1>(); + auto c = xs[2]->t<1>(); + typedef Eigen::Tensor::DimensionPair DimPair; + Eigen::array dims({{DimPair(2, 0)}}); + Eigen::array dims2({{DimPair(1, 0)}}); + if (xs.size() == 3) { + fx.t<1>().device(*dev.edevice) = A.contract(b, dims).contract(c, dims2); + } else { + auto d = xs[3]->t<1>(); + fx.t<1>().device(*dev.edevice) = A.contract(b, dims).contract(c, dims2) + d; + } +#endif +} + +template +void InnerProduct3D_1D_1D::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#if defined(__CUDACC__) && defined(DYNET_SKIP_CUDA_CONTRACTIONS) + throw std::runtime_error("InnerProduct3D_1D_1D::backward_dev_impl disabled on CUDA. Comment out DYNET_SKIP_CUDA_CONTRACTIONS in nodes-contract.cc to enable this function."); +#else + auto tdEdf = dEdf.t<1>(); // vector + typedef Eigen::Tensor::DimensionPair DimPair; + if (i == 0) { // 3 tensor + // tensor product + auto b = xs[1]->t<1>(); + auto c = xs[2]->t<1>(); + dEdxi.t<3>().device(*dev.edevice) += tdEdf.contract(c, Eigen::array{{}}).contract(b, Eigen::array{{}}); + } else if (i == 1) { // vector 1 + // TODO these should be reorganized so the contraction is first with tdEdf and then with c or b. + // in theory, that intermediate result could be cached (although DYNET doesn't support this). the fact that it + // this part of the product is redone when i=1 and again when i=2 is probably why this is slower + // (or maybe it's the contract implementation?) + Eigen::array dims({{DimPair(1, 0)}}); + Eigen::array dims2({{DimPair(0, 0)}}); + auto A = xs[0]->t<3>(); + auto c = xs[2]->t<1>(); + dEdxi.t<1>().device(*dev.edevice) += A.contract(c, dims).contract(tdEdf, dims2); + } else if (i == 2) { // vector 2 + Eigen::array dims({{DimPair(2, 0)}}); + Eigen::array dims2({{DimPair(0, 0)}}); + auto A = xs[0]->t<3>(); + auto b = xs[1]->t<1>(); + dEdxi.t<1>().device(*dev.edevice) += A.contract(b, dims).contract(tdEdf, dims2); + } else if (i == 3) { // vector bias + dEdxi.t<1>().device(*dev.edevice) += tdEdf; + } else { + throw std::runtime_error("Illegal configuration in InnerProduct3D"); + } +#endif +} +DYNET_NODE_INST_DEV_IMPL(InnerProduct3D_1D_1D) + + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/nodes-contract.h b/thirdparty/dynet/dynet/nodes-contract.h new file mode 100644 index 000000000..03d09ed4c --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-contract.h @@ -0,0 +1,32 @@ +#ifndef DYNET_NODES_CONTRACT_H_ +#define DYNET_NODES_CONTRACT_H_ + +#include "dynet/dynet.h" +#include "dynet/devices.h" +#include "dynet/nodes-macros.h" + +// See nodes-macros.h for more details about DYNET_NODE_DEFINE_DEV_IMPL(). + +namespace dynet { + +// Forward: +// Y_ij = A_ijk * B_k + C_ij +// +// Backward: +// (dE/dA)_ijk = (dE/dY)_ij * L_k +// (dE/dB)_k = (dE/dY)_ij * A_ijk +// (dE/dC)_ij = (dE/dY)_ij +struct InnerProduct3D_1D : public Node { + InnerProduct3D_1D(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// Y_i = A_ijk * B_k * C_j +struct InnerProduct3D_1D_1D : public Node { + InnerProduct3D_1D_1D(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/nodes-conv.cc b/thirdparty/dynet/dynet/nodes-conv.cc new file mode 100644 index 000000000..8bec337ee --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-conv.cc @@ -0,0 +1,453 @@ +#include "dynet/nodes-conv.h" + +#include +#include +#include +#include +#include + +#include "dynet/functors.h" +#include "dynet/nodes-macros.h" +#include "third_party/eigen_spatial_convolutions.h" +#include "third_party/eigen_backward_spatial_convolutions.h" + +#if HAVE_CUDA +#include "dynet/cuda.h" +#include "dynet/gpu-ops.h" +#endif + +using namespace std; + +namespace dynet { + +#ifndef __CUDACC__ + +string AverageColumns::as_string(const vector& arg_names) const { + ostringstream s; + s << "average_cols(matrix=" << arg_names[0] << ')'; + return s.str(); +} + +Dim AverageColumns::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1 || xs.size() == 2, "Failed input count check in AverageColumns"); + int bd = (xs.size() == 1 ? xs[0].bd : max(xs[0].bd, xs[1].bd)); + return Dim({xs[0].rows()}, bd); +} + +string FoldRows::as_string(const vector& arg_names) const { + ostringstream os; + os << "fold_rows(" << arg_names[0] << ", nrows=" << nrows << ')'; + return os.str(); +} + +Dim FoldRows::dim_forward(const vector& xs) const { + unsigned orows = xs[0].rows() / nrows; + if ((orows * nrows != xs[0].rows()) || xs.size() != 1 || xs[0].ndims() > 2) { + ostringstream s; s << "Bad input dimensions in FoldRows: " << xs; + throw std::invalid_argument(s.str()); + } + return Dim({orows, xs[0].cols()}); +} + +/* Deprecated +string Conv1DNarrow::as_string(const vector& arg_names) const { + ostringstream os; + os << "conv1d_narrow(" << arg_names[0] << ", f=" << arg_names[1] << ')'; + return os.str(); +} + +Dim Conv1DNarrow::dim_forward(const vector& xs) const { + if (xs.size() != 2) { + ostringstream s; s << "Conv1DNarrow requires two inputs: " << xs; + throw std::invalid_argument(s.str()); + } + int ocols = xs[0].cols() - xs[1].cols() + 1; + if (xs[0].ndims() != 2 || xs[1].ndims() != 2 || + xs[0].rows() != xs[1].rows() || + ocols < 1) { + ostringstream s; s << "Bad input dimensions in Conv1DNarrow: " << xs; + throw std::invalid_argument(s.str()); + } + return Dim({xs[0].rows(), (unsigned)ocols}); +} + +string Conv1DWide::as_string(const vector& arg_names) const { + ostringstream os; + os << "conv1d_wide(" << arg_names[0] << ", f=" << arg_names[1] << ')'; + return os.str(); +} + +Dim Conv1DWide::dim_forward(const vector& xs) const { + if (xs.size() != 2) { + ostringstream s; s << "Conv1DWide requires two inputs: " << xs; + throw std::invalid_argument(s.str()); + } + unsigned ocols = xs[0].cols() + xs[1].cols() - 1; + if (xs[0].ndims() != 2 || xs[1].ndims() != 2 || + xs[0].rows() != xs[1].rows()) { + ostringstream s; s << "Bad input dimensions in Conv1DWide: " << xs; + throw std::invalid_argument(s.str()); + } + return Dim({xs[0].rows(), ocols}); +} +*/ + +string Filter1DNarrow::as_string(const vector& arg_names) const { + ostringstream os; + os << "conv1d_narrow(" << arg_names[0] << ", f=" << arg_names[1] << ')'; + return os.str(); +} + +Dim Filter1DNarrow::dim_forward(const vector& xs) const { + if (xs.size() != 2) { + ostringstream s; s << "Filter1DNarrow requires two inputs: " << xs; + throw std::invalid_argument(s.str()); + } + int ocols = xs[0].cols() - xs[1].cols() + 1; + if (xs[0].ndims() != 2 || xs[1].ndims() < 2 || + xs[0].rows() != xs[1].rows() || + ocols < 1) { + ostringstream s; s << "Bad input dimensions in Filter1DNarrow: " << xs; + throw std::invalid_argument(s.str()); + } + const unsigned fids = (xs[1].ndims() > 2 ? xs[1][2] : 1); + return Dim({fids, (unsigned)ocols}); +} + +string KMaxPooling::as_string(const vector& arg_names) const { + ostringstream os; + os << "kmaxpool(" << arg_names[0] << ", k=" << k << ", d=" << pooled_dim << ')'; + return os.str(); +} + +Dim KMaxPooling::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(pooled_dim < xs[0].nd, + "Tried to MaxDimension on dimension " << pooled_dim << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "MaxDimension not currently supported for tensors of 4 or more dimensions."); + DYNET_ARG_CHECK(k >= 1, "Bad bad k in KMaxPooling: " << k); + DYNET_ARG_CHECK(k <= xs[0][pooled_dim], + "Bad k in KMaxPooling: k = " << k << " bigger than the size of pooled dimension " + << pooled_dim << " with size = " << xs[0][pooled_dim]); + Dim ret(xs[0]); + ret.set(pooled_dim, k); + return ret; +} + +size_t KMaxPooling::aux_storage_size() const { + // map of where the entries in f(x) go to entries in x + return sizeof(Eigen::DenseIndex) * dim.size(); +} + +string SumDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "sum_dim(matrix=" << arg_names[0] << ',' << dimension << '}'; + return s.str(); +} + +Dim SumDimension::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); + Dim ret(xs[0]); + ret.delete_dim(dimension); + return ret; +} +#endif + +template +void AverageColumns::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in AverageColumns"); + unsigned cols = xs[0]->d.cols(); +#ifdef __CUDACC__ + // The reduction used on CPU is better, but not implemented in GPU + fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().chip<1>(0); + for(unsigned i = 1; i < cols; ++i) + fx.t<1>().device(*dev.edevice) += xs[0]->t<2>().chip<1>(i); + fx.t<1>().device(*dev.edevice) = fx.t<1>() / (float)cols; +#else + const Eigen::array reduction_axis = {1}; + fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().sum(reduction_axis) / (float)cols; +#endif +} + +template +void AverageColumns::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const Eigen::array broadcasts = {1, xs[0]->d[1]}; + dEdxi.t<2>().device(*dev.edevice) += (dEdf.t<2>() / (float)xs[0]->d[1]).broadcast(broadcasts); +} +DYNET_NODE_INST_DEV_IMPL(AverageColumns) + +/* Deprecated +template +void Conv1DNarrow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const unsigned ycols = dim.cols(); + const unsigned fcols = xs[1]->d.cols(); + for (unsigned j = 0; j < ycols; ++j) { + fx.t<2>().chip<1>(j).device(*dev.edevice) = xs[0]->t<2>().chip<1>(j) * xs[1]->t<2>().chip<1>(0); + for (unsigned k = 1; k < fcols; ++k) + fx.t<2>().chip<1>(j).device(*dev.edevice) += xs[0]->t<2>().chip<1>(j+k) * xs[1]->t<2>().chip<1>(k); + } + // TODO: This following version without chip is better, but for some reason dimensions don't match. + // Eigen::array dims; dims[0] = 1; + // fx.t<2>().device(*dev.edevice) = xs[0]->t<2>().convolve(xs[1]->t<2>(), dims); +} + +template +void Conv1DNarrow::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed input count check in Conv1DNarrow"); + const unsigned ycols = dim.cols(); + const unsigned fcols = xs[1]->d.cols(); + // TODO: Can this be done with a kernel and without using chip? + if (i == 0) { // derivative wrt input x + for (unsigned j = 0; j < ycols; ++j) + for (unsigned k = 0; k < fcols; ++k) + dEdxi.t<2>().chip<1>(j+k).device(*dev.edevice) += xs[1]->t<2>().chip<1>(k) * dEdf.t<2>().chip<1>(j); + } else { // derivative wrt filter f + for (unsigned j = 0; j < ycols; ++j) + for (unsigned k = 0; k < fcols; ++k) + dEdxi.t<2>().chip<1>(k).device(*dev.edevice) += xs[0]->t<2>().chip<1>(j+k) * dEdf.t<2>().chip<1>(j); + } +} +DYNET_NODE_INST_DEV_IMPL(Conv1DNarrow) + +template +void Conv1DWide::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + TensorTools::zero(fx); + const unsigned xcols = xs[0]->d.cols(); + const unsigned fcols = xs[1]->d.cols(); + for (unsigned j = 0; j < xcols; ++j) + for (unsigned k = 0; k < fcols; ++k) + fx.t<2>().chip<1>(j+k).device(*dev.edevice) += xs[1]->t<2>().chip<1>(k) * xs[0]->t<2>().chip<1>(j); +} + + +template +void Conv1DWide::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const unsigned xcols = xs[0]->d.cols(); + const unsigned fcols = xs[1]->d.cols(); + if (i == 0) { // derivative wrt input x + for (unsigned j = 0; j < xcols; ++j) + for (unsigned k = 0; k < fcols; ++k) + dEdxi.t<2>().chip<1>(j).device(*dev.edevice) += xs[1]->t<2>().chip<1>(k) * dEdf.t<2>().chip<1>(j + k); + } else { // derivative wrt filter f + for (unsigned j = 0; j < xcols; ++j) + for (unsigned k = 0; k < fcols; ++k) + dEdxi.t<2>().chip<1>(k).device(*dev.edevice) += xs[0]->t<2>().chip<1>(j) * dEdf.t<2>().chip<1>(j + k); + } +} +DYNET_NODE_INST_DEV_IMPL(Conv1DWide) +*/ + +template +void Filter1DNarrow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const Eigen::array dims = {0, 1}; + if(xs[1]->d.ndims() == 2) { + fx.t<2>().device(*dev.edevice) = xs[0]->t<2>().convolve(xs[1]->t<2>(), dims); + } else { + DYNET_ASSERT(xs[1]->d.ndims() > 2, "Input to Filter1DNarrow must have 2 or more dimensions"); + const unsigned fids = xs[1]->d[2]; + const unsigned ycols = dim.cols(); + Eigen::DSizes indices(0,0); + Eigen::DSizes sizes(1,ycols); + for(unsigned fid = 0; fid < fids; ++fid) { + indices[0] = fid; +#if defined(__CUDACC__) && defined(EIGEN_NO_MALLOC) + throw std::runtime_error("CUDA memory allocation in Filter1DNarrow"); +#endif + fx.t<2>().slice(indices, sizes).device(*dev.edevice) = xs[0]->t<2>().convolve(xs[1]->t<3>().chip<2>(fid), dims); + } + } +} + +template +void Filter1DNarrow::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed input count check in Filter1DNarrow"); + const unsigned rows = xs[1]->d.rows(); + const unsigned ycols = dim.cols(); + const unsigned fcols = xs[1]->d.cols(); + const unsigned fids = (xs[1]->d.ndims() > 2 ? xs[1]->d[2] : 1); + Eigen::DSizes sizes(rows,fcols); + Eigen::DSizes indices(0,0); + // TODO: This implementation is by no means optimized. Is there a better way to do it? + vector dEdf_vec = as_vector(dEdf); + if(i == 0) { + for(unsigned i = 0; i < ycols; i++) { + indices[1] = i; + if(fids == 1) { + dEdxi.t<2>().slice(indices, sizes).device(*dev.edevice) += xs[1]->t<2>() * dEdf_vec[i]; + } else { + for(unsigned fid = 0; fid < fids; fid++) + dEdxi.t<2>().slice(indices, sizes).device(*dev.edevice) += xs[1]->t<3>().chip<2>(fid) * dEdf_vec[fid + i * fids]; + } + } + } else { + for(unsigned i = 0; i < ycols; i++) { + indices[1] = i; + if(fids == 1) { + dEdxi.t<2>().device(*dev.edevice) += xs[0]->t<2>().slice(indices, sizes) * dEdf_vec[i]; + } else { + for(unsigned fid = 0; fid < fids; fid++) + dEdxi.t<3>().chip<2>(fid).device(*dev.edevice) += xs[0]->t<2>().slice(indices, sizes) * dEdf_vec[fid + i * fids]; + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(Filter1DNarrow) + + +template +void FoldRows::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + unsigned orows = fx.d.rows(); + for (unsigned i = 0; i < orows; ++i) { + fx.tb<2>().chip<0>(i).device(*dev.edevice) = xs[0]->tb<2>().chip<0>(i * nrows); + for (unsigned j = 1; j < nrows; ++j) + fx.tb<2>().chip<0>(i).device(*dev.edevice) += xs[0]->tb<2>().chip<0>(i * nrows + j); + } + // TODO: This broadcasting should work? + // array broadcasts; broadcasts[0] = nrows; + // fx.tvec().broadcast(broadcasts).device(*dev.edevice) += xs[0]->tvec(); +} + +template +void FoldRows::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const Eigen::array broadcasts = {nrows}; + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec().broadcast(broadcasts); + // unsigned orows = fx.d.rows(); + // for (unsigned i = 0; i < orows; ++i) + // for (unsigned j = 0; j < nrows; ++j) + // dEdxi.tb<2>().chip<0>(i * nrows + j) += d.tb<2>().chip<0>(i); +} +DYNET_NODE_INST_DEV_IMPL(FoldRows) + +template +void KMaxPooling::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + // TODO: The code that works on CPU does not compile on CUDA + throw std::runtime_error("KMaxPooling::forward_dev_impl not working on CUDA yet"); +#endif + Eigen::DenseIndex* maxmap = static_cast(aux_mem); + Eigen::TensorMap> locs(maxmap, dim[0], dim[1], dim[2], dim.batch_elems()); + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[first_dim]; + const unsigned second_dim_size = dim[second_dim]; + Eigen::Tensor tmp(xs[0]->d[pooled_dim]); + for (unsigned b = 0; b < batch_size; ++b){ + for (unsigned j = 0; j < second_dim_size; ++j){ + for (unsigned i = 0; i < first_dim_size; ++i){ + // get nth element + tmp.device(*dev.edevice) = xs[0]->tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim); + nth_element(tmp.data(), tmp.data()+(k-1), tmp.data()+tmp.size(), std::greater()); + const float c = tmp.data()[k-1]; + // calculate fx and indices + tmp.device(*dev.edevice) = xs[0]->tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim); + unsigned tt = 0; + for (unsigned l = 0; l < tmp.size(); ++l) { + const float tensor_val = tmp.data()[l]; + if (tensor_val >= c) { + if (pooled_dim > second_dim){ + fx.tb<3>().chip<3>(b).chip(tt, pooled_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) = tmp.chip<0>(l); + locs(i, j, tt, b) = l; + } + else if (pooled_dim > first_dim){ + fx.tb<3>().chip<3>(b).chip(j, second_dim).chip(tt, pooled_dim).chip(i, first_dim).device(*dev.edevice) = tmp.chip<0>(l); + locs(i, tt, j, b) = l; + } + else { + fx.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(tt, pooled_dim).device(*dev.edevice) = tmp.chip<0>(l); + locs(tt, i, j, b) = l; + } + ++tt; + if (tt == k) break; // could happen in case of ties + } + } + } + } + } +} + +template +void KMaxPooling::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in KMaxPooling::backward"); +#ifdef __CUDACC__ + vector indices(dim.size()); + Eigen::DenseIndex* maxmap = &indices[0]; + CUDA_CHECK(cudaMemcpy((void*)maxmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); +#else + Eigen::DenseIndex* maxmap = static_cast(aux_mem); +#endif + Eigen::TensorMap> locs(maxmap, dim[0], dim[1], dim[2], dim.batch_elems()); + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[first_dim]; + const unsigned second_dim_size = dim[second_dim]; + const unsigned pooled_dim_size = dim[pooled_dim]; + for(unsigned b = 0; b < batch_size; ++b){ + for(unsigned j = 0; j < second_dim_size; ++j){ + for(unsigned i = 0; i < first_dim_size; ++i){ + for(unsigned l = 0; l < pooled_dim_size; ++l){ + if (pooled_dim > second_dim) + dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, l, b), pooled_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<3>().chip<3>(b).chip<2>(l).chip<1>(j).chip<0>(i); + else if (pooled_dim > first_dim) + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, l, j, b), pooled_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<3>().chip<3>(b).chip<2>(j).chip<1>(l).chip<0>(i); + else + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(l, i, j, b), pooled_dim).device(*dev.edevice) + += dEdf.tb<3>().chip<3>(b).chip<2>(j).chip<1>(i).chip<0>(l); + } + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(KMaxPooling) + +template +void SumDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); + Eigen::array reduction_axis = {(int)dimension}; + fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().sum(reduction_axis); +} + +template +void SumDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // TODO: limit to 3-dimensional tensor is arbitrary + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = dEdxi.d[dimension]; + Eigen::array morph = {(int)dEdxi.d[0],(int)dEdxi.d[1],(int)dEdxi.d[2],(int)dEdxi.d.bd}; morph[dimension] = 1; + dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<3>().reshape(morph).broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(SumDimension) + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/nodes-conv.h b/thirdparty/dynet/dynet/nodes-conv.h new file mode 100644 index 000000000..e47a0aafa --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-conv.h @@ -0,0 +1,100 @@ +#ifndef DYNET_NODES_CONV_H_ +#define DYNET_NODES_CONV_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" +#include "dynet/op-helper.h" + +#if HAVE_CUDNN +#include "dynet/cudnn-ops.h" +#endif + +namespace dynet { + +// with a single argument x \in R^{n x m} +// y_i = \sum_j x_i,j / m +struct AverageColumns : public Node { + template explicit AverageColumns(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +/* Deprecated +// y = x_1 *conv x_2 +// x_1 \in R^{d x s} (input) +// x_2 \in R^{d x m} (filter) +struct Conv1DNarrow : public Node { + explicit Conv1DNarrow(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 *conv x_2 +// x_1 \in R^{d x s} (input) +// x_2 \in R^{d x m} (filter) +struct Conv1DWide : public Node { + explicit Conv1DWide(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; +*/ + +// y = x_1 *filter x_2 +// x_1 \in R^{d x s} (input) +// x_2 \in R^{d x m} (filter) +struct Filter1DNarrow : public Node { + explicit Filter1DNarrow(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +struct FoldRows : public Node { + explicit FoldRows(const std::initializer_list& a, unsigned nrows) : Node(a), nrows(nrows) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned nrows; +}; + +struct KMaxPooling : public Node { + explicit KMaxPooling(const std::initializer_list& a, unsigned k = 1, unsigned dimension = 1) : Node(a), k(k), pooled_dim(dimension) { + first_dim = pooled_dim == 0 ? 1 : 0; + second_dim = first_dim + 1 == pooled_dim ? first_dim + 2 : first_dim + 1; + } + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned k; + unsigned pooled_dim; + unsigned first_dim; + unsigned second_dim; +}; + +// sum along a single dimension +struct SumDimension : public Node { + template explicit SumDimension(const T& a, unsigned d) : Node(a), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned dimension; +}; + +// conv2d +// y = x_1 *conv2d x_2 +// x_1 \in R^{H x W x Ci x N} (input) +// x_2 \in R^{H x W x Ci x Co} (filter) +// stride[0] corresponds to H +// stride[1] corresponds to W +// is_valid: true for 'VALID' and false for 'SAME' +struct Conv2D: public Node { + explicit Conv2D(const std::initializer_list& a, const std::vector& s, + const bool padding_type = true) + : Node(a), stride(s), is_valid(padding_type) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + const std::vector stride; + const bool is_valid; + + private: +#if HAVE_CUDNN + mutable CudnnConvOp* cudnn_conv_op_ = NULL; +#endif +}; + + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/nodes-conv2d.cc b/thirdparty/dynet/dynet/nodes-conv2d.cc new file mode 100644 index 000000000..a9b6f039c --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-conv2d.cc @@ -0,0 +1,190 @@ +#include "dynet/nodes-conv.h" + +#include +#include +#include +#include +#include + +#include "dynet/functors.h" +#include "dynet/nodes-macros.h" +#include "third_party/eigen_spatial_convolutions.h" +#include "third_party/eigen_backward_spatial_convolutions.h" + +#if HAVE_CUDA +#include "dynet/cuda.h" +#include "dynet/gpu-ops.h" +#endif + +using namespace std; + +namespace dynet { + +#ifndef __CUDACC__ + +string Conv2D::as_string(const vector& arg_names) const { + ostringstream s; + s << "conv2d(" << arg_names[0] << ", f=" << arg_names[1]; + if (arg_names.size() == 3) + s << ", b=" << arg_names[2]; + s << ")"; + return s.str(); +} + +Dim Conv2D::dim_forward(const vector& xs) const { + if (xs.size() != 2 && xs.size() != 3) { + ostringstream s; s << "Conv2D requires either two or three inputs: " << xs; + throw std::invalid_argument(s.str()); + } + if (xs[0].ndims() != 3 || xs[1].ndims() != 4 || + xs[1].d[2] != xs[0].d[2]) { + ostringstream s; s << "Bad input dimensions in Conv2D: " << xs; + throw std::invalid_argument(s.str()); + } + if (is_valid && (xs[0].d[0] < xs[1].d[0] || xs[0].d[1] < xs[1].d[1])) { + ostringstream s; s << "Bad input dimensions in Conv2D: in VALID convolution, the filter size must not be greater than the feature map size" << xs; + throw std::invalid_argument(s.str()); + } + if (xs.size() == 3) { //has bias term + if (xs[2].d[0] != xs[1].d[3] || xs[2].ndims() != 1) { + ostringstream s; s << "Bad input dimensions in Conv2D: " << xs; + throw std::invalid_argument(s.str()); + } + } + unsigned bs = xs[0].batch_elems(); + std::vector output_shape(3); + output_shape[2] = static_cast(xs[1].d[3]); + for (unsigned i = 0; i < 2; ++i) { + float input_dim = static_cast(xs[0].d[i]); + float kernel_dim = static_cast(xs[1].d[i]); + float s = static_cast(stride[i]); + if (is_valid) { + output_shape[i] = static_cast(ceil((input_dim - kernel_dim + 1) / s)); + } else { + output_shape[i] = static_cast(ceil(input_dim / s)); + } + } + return Dim(output_shape, bs); +} + +size_t Conv2D::aux_storage_size() const { + vector input_size(arity()); + for (unsigned i = 0; i < arity(); ++i) { + input_size[i] = get_cg()->nodes[args[i]]->dim.size(); + } + size_t nbytes = 0; +#if HAVE_CUDNN + nbytes += CudnnConvOp::workspace_size_limit_bytes; + nbytes += 3 * input_size[0] * sizeof(float); +#else + nbytes += sizeof(float) * (input_size[0] + input_size[1] + + dim.size() + std::max(input_size[0], input_size[1])); +#endif + return nbytes; +} +#endif + +template +void Conv2D::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2 || xs.size() == 3, "Failed dimension check in Conv2D::forward, at least 2 inputs"); + DYNET_ASSERT(fx.d.bd == xs[0]->d.bd, "Failed dimension check in Conv2D::forward, batchsize not match"); + DYNET_ASSERT(fx.d[2] == xs[1]->d[3], "Failed dimension check in Conv2D::forward, #channel not match"); + NodeMemPool aux_mem_pool = NodeMemPool(aux_storage_size(), aux_mem); +#ifdef __CUDACC__ +#if HAVE_CUDNN + if (cudnn_conv_op_ == NULL) { + cudnn_conv_op_ = new CudnnConvOp(stride, is_valid); + cudnn_conv_op_->set_pool(&aux_mem_pool); + } + cudnn_conv_op_->forward_impl(dev, xs, fx); +#else + throw std::runtime_error("Conv2D::forward_dev_impl not supported without CUDNN"); +#endif +#else + Eigen::PaddingType padding_type = is_valid ? Eigen::PADDING_VALID : Eigen::PADDING_SAME; + void* CHWN_x_mem = aux_mem_pool.allocate(xs[0]->d.size() * sizeof(float)); + Tensor CHWN_x = Tensor(Dim({xs[0]->d[2], xs[0]->d[0], xs[0]->d[1]}, xs[0]->d.bd), static_cast(CHWN_x_mem), xs[0]->device, DeviceMempool::FXS); + Eigen::array shuffles; + shuffles[0] = 2; shuffles[1] = 0; shuffles[2] = 1; shuffles[3] = 3; + CHWN_x.tb<3>().device(*dev.edevice) = xs[0]->tb<3>().shuffle(shuffles); + void* NCHW_f_mem = aux_mem_pool.allocate(xs[1]->d.size() * sizeof(float)); + Tensor NCHW_f = Tensor(Dim({xs[1]->d[3], xs[1]->d[2], xs[1]->d[0], xs[1]->d[1]}), static_cast(NCHW_f_mem), xs[1]->device, DeviceMempool::FXS); + shuffles[0] = 3; shuffles[1] = 2; shuffles[2] = 0; shuffles[3] = 1; + NCHW_f.t<4>().device(*dev.edevice) = xs[1]->t<4>().shuffle(shuffles); + void* CHWN_y_mem = aux_mem_pool.allocate(fx.d.size() * sizeof(float)); + Tensor CHWN_y = Tensor(Dim({fx.d[2], fx.d[0], fx.d[1]}, fx.d.bd), static_cast(CHWN_y_mem), fx.device, DeviceMempool::FXS); + CHWN_y.tb<3>().device(*dev.edevice) = Eigen::SpatialConvolution(CHWN_x.tb<3>(), NCHW_f.t<4>(), stride[0], stride[1], padding_type); + shuffles[0] = 1; shuffles[1] = 2; shuffles[2] = 0; shuffles[3] = 3; + fx.tb<3>().device(*dev.edevice) = CHWN_y.tb<3>().shuffle(shuffles); + //NWHCToNCWH()(&NWHC_y, fx); + if (xs.size() == 3) { + Tensor bias = Tensor(Dim({fx.d[0], fx.d[1], fx.d.bd}, 1), static_cast(CHWN_x_mem), xs[2]->device, DeviceMempool::FXS); + for (unsigned i = 0; i < fx.d[2]; ++i) { + TensorTools::constant(bias, xs[2]->vec()(i)); + fx.tb<3>().chip<2>(i).device(*dev.edevice) += bias.t<3>(); + } + } +#endif +} + +template +void Conv2D::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // don't check those already checked in forward_impl + DYNET_ASSERT(dEdf.d == fx.d, "Failed dimension check in Conv2D::backward"); + DYNET_ASSERT(dEdxi.d == xs[i]->d, "Failed dimension check in Conv2D::backward"); + DYNET_ASSERT(i <= 2, "Failed dimension check in Conv2D::backward"); + NodeMemPool aux_mem_pool = NodeMemPool(aux_storage_size(), aux_mem); +#ifdef __CUDACC__ +#if HAVE_CUDNN + DYNET_ASSERT(cudnn_conv_op_ != NULL, "cudnn operator is not initialized"); + cudnn_conv_op_->set_pool(&aux_mem_pool); + cudnn_conv_op_->backward_impl(dev, xs, fx, dEdf, i, dEdxi); +#else + throw std::runtime_error("Conv2D::backward_dev_impl not supported without CUDNN"); +#endif +#else + void* CHWN_dy_mem = aux_mem_pool.allocate(dEdf.d.size() * sizeof(float)); + Tensor CHWN_dy = Tensor(Dim({dEdf.d[2], dEdf.d[0], dEdf.d[1]}, dEdf.d.bd), static_cast(CHWN_dy_mem), dEdf.device, DeviceMempool::FXS); + Eigen::array shuffles; + shuffles[0] = 2; shuffles[1] = 0; shuffles[2] = 1; shuffles[3] = 3; + CHWN_dy.tb<3>().device(*dev.edevice) = dEdf.tb<3>().shuffle(shuffles); + if (i == 0) { //backward w.r.t the input + void* NCHW_f_mem = aux_mem_pool.allocate(xs[1]->d.size() * sizeof(float)); + Tensor NCHW_f = Tensor(Dim({xs[1]->d[3], xs[1]->d[2], xs[1]->d[0], xs[1]->d[1]}), static_cast(NCHW_f_mem), xs[1]->device, DeviceMempool::FXS); + shuffles[0] = 3; shuffles[1] = 2; shuffles[2] = 0; shuffles[3] = 1; + NCHW_f.t<4>().device(*dev.edevice) = xs[1]->t<4>().shuffle(shuffles); + void* CHWN_dEdxi_mem = aux_mem_pool.allocate(xs[0]->d.size() * sizeof(float)); + Tensor CHWN_dEdxi = Tensor(Dim({xs[0]->d[2], xs[0]->d[0], xs[0]->d[1]}, xs[0]->d.bd), static_cast(CHWN_dEdxi_mem), dEdxi.device, DeviceMempool::FXS); + CHWN_dEdxi.tb<3>().device(*dev.edevice) = Eigen::SpatialConvolutionBackwardInput(NCHW_f.t<4>(), CHWN_dy.tb<3>(), xs[0]->d[0], xs[0]->d[1], stride[0], stride[1]); + void* HWCN_dEdxi_mem = aux_mem_pool.allocate(xs[0]->d.size() * sizeof(float)); + Tensor HWCN_dEdxi = Tensor(xs[0]->d, static_cast(HWCN_dEdxi_mem), dEdxi.device, DeviceMempool::FXS); + shuffles[0] = 1; shuffles[1] = 2; shuffles[2] = 0; shuffles[3] = 3; + HWCN_dEdxi.tb<3>().device(*dev.edevice) = CHWN_dEdxi.tb<3>().shuffle(shuffles); + dEdxi.tb<3>().device(*dev.edevice) += HWCN_dEdxi.tb<3>(); + } else if (i == 1) { //backward w.r.t the kernel + void* CHWN_x_mem = aux_mem_pool.allocate(xs[0]->d.size() * sizeof(float)); + Tensor CHWN_x = Tensor(Dim({xs[0]->d[2], xs[0]->d[0], xs[0]->d[1]}, xs[0]->d.bd), static_cast(CHWN_x_mem), xs[0]->device, DeviceMempool::FXS); + shuffles[0] = 2; shuffles[1] = 0; shuffles[2] = 1; shuffles[3] = 3; + CHWN_x.tb<3>().device(*dev.edevice) = xs[0]->tb<3>().shuffle(shuffles); + void* NCHW_dEdxi_mem = aux_mem_pool.allocate(xs[1]->d.size() * sizeof(float)); + Tensor NCHW_dEdxi = Tensor(Dim({xs[1]->d[3], xs[1]->d[2], xs[1]->d[0], xs[1]->d[1]}), static_cast(NCHW_dEdxi_mem), dEdxi.device, DeviceMempool::FXS); + NCHW_dEdxi.t<4>().device(*dev.edevice) = Eigen::SpatialConvolutionBackwardKernel(CHWN_x.tb<3>(), CHWN_dy.tb<3>(), xs[1]->d[0], xs[1]->d[1], stride[0], stride[1]); + void* HWCN_dEdxi_mem = aux_mem_pool.allocate(xs[1]->d.size() * sizeof(float)); + Tensor HWCN_dEdxi = Tensor(xs[1]->d, static_cast(HWCN_dEdxi_mem), dEdxi.device, DeviceMempool::FXS); + shuffles[0] = 2; shuffles[1] = 3; shuffles[2] = 1; shuffles[3] = 0; + HWCN_dEdxi.t<4>().device(*dev.edevice) = NCHW_dEdxi.t<4>().shuffle(shuffles); + dEdxi.t<4>().device(*dev.edevice) += HWCN_dEdxi.t<4>(); + } else { //backward w.r.t the bias + Eigen::array red_axis = {0, 1, 3}; + dEdxi.t<1>().device(*dev.edevice) += dEdf.tb<3>().sum(red_axis); + } +#endif +} +DYNET_NODE_INST_DEV_IMPL(Conv2D) + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/nodes-macros.h b/thirdparty/dynet/dynet/nodes-macros.h new file mode 100644 index 000000000..ba084e31f --- /dev/null +++ b/thirdparty/dynet/dynet/nodes-macros.h @@ -0,0 +1,114 @@ +#ifndef DYNET_NODE_MACROS_H_ +#define DYNET_NODE_MACROS_H_ + +#include "dynet/dim.h" + +namespace dynet { + +inline bool LooksLikeVector(const Dim& d) { + if (d.ndims() == 1) return true; + if (d.ndims() > 1) { + for (unsigned i = 1; i < d.ndims(); ++i) + if (d[i] != 1) return false; + } + return true; +} + +} + +// A macro to dispatch things to the appropriate device +#define DYNET_NODE_DEFINE_DEV_IMPL() \ + std::string as_string(const std::vector& arg_names) const override; \ + Dim dim_forward(const std::vector& xs) const override; \ + void forward_impl(const std::vector& xs, Tensor& fx) const override; \ + template \ + void forward_dev_impl(const MyDevice & dev, const std::vector& xs, Tensor& fx) const; \ + void backward_impl(const std::vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const override; \ + template \ + void backward_dev_impl( \ + const MyDevice & dev, \ + const std::vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const; + +// A macro to instantiate templated device functions +// If the implementation is the same for both devices (using Eigen Tensors), +// then this will instantiate both CPU and GPU implementations, and the +// code can be the same. +// If the implementation is different for both devices, use #ifdef __CUDACC__ +// within the function, and create alternative code paths for CPU and GPU implementations +#ifdef __CUDACC__ +#define DYNET_NODE_INST_DEV_IMPL(MyNode) \ + template void MyNode::forward_dev_impl(const Device_GPU & dev, const vector& xs, Tensor& fx) const; \ + template void MyNode::backward_dev_impl(const Device_GPU & dev, \ + const vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const; +#elif HAVE_CUDA +// When we have CUDA, make sure we extern the GPU code to make sure that it is only +// generated by CUDACC +#define DYNET_NODE_INST_DEV_IMPL(MyNode) \ + extern template void MyNode::forward_dev_impl(const Device_GPU & dev, const vector& xs, Tensor& fx) const; \ + extern template void MyNode::backward_dev_impl(const Device_GPU & dev, \ + const vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const; \ + template void MyNode::forward_dev_impl(const Device_CPU & dev, const vector& xs, Tensor& fx) const; \ + template void MyNode::backward_dev_impl(const Device_CPU & dev, \ + const vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const; \ + void MyNode::forward_impl(const std::vector& xs, Tensor& fx) const { \ + DYNET_ASSERT(fx.device, "Device not allocated for expression"); \ + if(fx.device->type == DeviceType::CPU) { forward_dev_impl(*(dynet::Device_CPU*)fx.device,xs,fx); } \ + else if(fx.device->type == DeviceType::GPU) { forward_dev_impl(*(dynet::Device_GPU*)fx.device,xs,fx); } \ + else { throw std::runtime_error("Invalid device in MyNode::forward_impl"); } \ + } \ + void MyNode::backward_impl(const std::vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const { \ + DYNET_ASSERT(fx.device, "Device not allocated for expression"); \ + if(fx.device->type == DeviceType::CPU) { backward_dev_impl(*(dynet::Device_CPU*)fx.device,xs,fx,dEdf,i,dEdxi); } \ + else if(fx.device->type == DeviceType::GPU) { backward_dev_impl(*(dynet::Device_GPU*)fx.device,xs,fx,dEdf,i,dEdxi); } \ + else { throw std::runtime_error("Invalid device in MyNode::backward_impl"); } \ + } +#else +#define DYNET_NODE_INST_DEV_IMPL(MyNode) \ + template void MyNode::forward_dev_impl(const Device_CPU & dev, const vector& xs, Tensor& fx) const; \ + template void MyNode::backward_dev_impl(const Device_CPU & dev, \ + const vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const; \ + void MyNode::forward_impl(const std::vector& xs, Tensor& fx) const { \ + DYNET_ASSERT(fx.device, "Device not allocated for expression"); \ + if(fx.device->type == DeviceType::CPU) { forward_dev_impl(*(dynet::Device_CPU*)fx.device,xs,fx); } \ + else { throw std::runtime_error("Invalid device in MyNode::forward_impl"); } \ + } \ + void MyNode::backward_impl(const std::vector& xs, \ + const Tensor& fx, \ + const Tensor& dEdf, \ + unsigned i, \ + Tensor& dEdxi) const { \ + DYNET_ASSERT(fx.device, "Device not allocated for expression"); \ + if(fx.device->type == DeviceType::CPU) { backward_dev_impl(*(dynet::Device_CPU*)fx.device,xs,fx,dEdf,i,dEdxi); } \ + else { throw std::runtime_error("Invalid device in MyNode::backward_impl"); } \ + } +#endif + +#endif diff --git a/thirdparty/dynet/dynet/nodes.cc b/thirdparty/dynet/dynet/nodes.cc new file mode 100644 index 000000000..dd0bfd2f1 --- /dev/null +++ b/thirdparty/dynet/dynet/nodes.cc @@ -0,0 +1,2826 @@ +#include "dynet/nodes.h" + +#include +#include +#include + +#include "dynet/simd-functors.h" +#include "dynet/functors.h" +#include "dynet/nodes-macros.h" +#include "dynet/globals.h" + +#ifdef __CUDACC__ +#include "dynet/cuda.h" +#include "dynet/gpu-ops.h" +#endif + +using namespace std; + +inline string print_vec(const std::vector & vec) { + string sep = "["; + ostringstream oss; + for(auto f : vec) { + oss << sep << f; sep = ","; + } + oss << "]"; + return oss.str(); +} + +// notes on implementing differentiable components +// 1) fx can be understood as a pointer to the (preallocated) location for the result +// of forward to be stored +// 2) fx is not initialized, so after calling forward fx must point to the correct answer +// 3) fx can be repointed to an input, if forward(x) evaluates to x (e.g., in reshaping) +// 4) dEdxi MUST **ACCUMULATE** a result since multiple calls to forward may depend on +// the same x_i. Even, e.g., Identity must be implemented as +// dEdx1 += dEdf. THIS IS EXTREMELY IMPORTANT +// 5) scalars results of forward are placed in fx.v[0] +// 6) DYNET manages its own memory, not Eigen, and it is configured with the +// EIGEN_NO_MALLOC option. If you get an error about Eigen attempting to allocate +// memory, it is (probably) because of an implicit creation of a temporary variable. +// To tell Eigen this is not necessary, the noalias() method is available. If you really +// do need a temporary variable, its capacity must be requested by Node::aux_storage_size +// +// notes on debugging problems with differentiable components +// 1) fx is uninitialized when forward is called- are you relying on it being 0? +// 2) dEdxi must accummulate (see point 4 above!) +// + +namespace dynet { + +// ======= Shared definitions +#define MAX_LOG_SUM_EXP 65536 +#define MAX_SPARSEMAX_LOSS_ROWS 65536 + +// ======= Functions to be compiled on only CPU +#ifndef __CUDACC__ + +// set use_cholesky if M is symmetric - it's faster and more stable +// for dep paring it won't be +template +inline typename MatrixType::Scalar logdet(const MatrixType& M, bool use_cholesky = false) { + using namespace Eigen; + using std::log; + typedef typename MatrixType::Scalar Scalar; + Scalar ld = 0; + if (use_cholesky) { + LLT> chol(M); + auto& U = chol.matrixL(); + for (unsigned i = 0; i < M.rows(); ++i) + ld += log(U(i,i)); + ld *= 2; + } else { + PartialPivLU> lu(M); + auto& LU = lu.matrixLU(); + Scalar c = lu.permutationP().determinant(); // -1 or 1 + for (unsigned i = 0; i < LU.rows(); ++i) { + const auto& lii = LU(i,i); + if (lii < Scalar(0)) c *= -1; + ld += log(abs(lii)); + } + ld += log(c); + } + return ld; +} + +template +EIGEN_STRONG_INLINE real logsumexp(const T& x, const vector& denom) { + real m = x(denom[0],0); + for (auto i : denom) { + real r = x(i,0); + if (r > m) m = r; + } + real z = 0; + for (auto i : denom) + z += expf(x(i,0) - m); + return m + logf(z); +} + +// ===== Auxiliary functions + +size_t BlockDropout::aux_storage_size() const { + // we just need to remember whether this entire block is turned on (1.0) or off (0.0) + return 1 * sizeof(float); +} + +size_t Dropout::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +size_t DropoutDim::aux_storage_size() const { + return (dim.size() / dim[dimension]) * sizeof(float); +} + +size_t DropoutBatch::aux_storage_size() const { + return dim.batch_elems() * sizeof(float); +} + +size_t GaussianNoise::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +size_t Hinge::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +size_t LogSoftmax::aux_storage_size() const { + return 2 * dim.size() / dim.rows() * sizeof(float); +} + +size_t PickNegLogSoftmax::aux_storage_size() const { + return 2 * dim.batch_elems() * sizeof(float) + dim.batch_elems() * sizeof(unsigned int); +} + +// this i need to do something better, but this is a work-around +// if this is too small, just make it bigger +size_t LogSumExp::aux_storage_size() const { + return (MAX_LOG_SUM_EXP + 1) * sizeof(float); +} + +size_t Max::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +size_t Min::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +size_t Softmax::aux_storage_size() const { + return 2 * dim.size() / dim.rows() * sizeof(float); +} + +size_t Sparsemax::aux_storage_size() const { + return (dim.size() + 1) * sizeof(float); +} + +size_t SparsemaxLoss::aux_storage_size() const { + // first dim.size dimensions is the sparsemax + const unsigned rows = MAX_SPARSEMAX_LOSS_ROWS; // this should be xs[0]->d.rows() + return rows * sizeof(float); +} + +size_t MaxDimension::aux_storage_size() const { + return sizeof(Eigen::DenseIndex) * dim.size(); +} + +size_t MinDimension::aux_storage_size() const { + return sizeof(Eigen::DenseIndex) * dim.size(); +} + +#endif // Finish CPU only functions + +// ===== Auxiliary functions for both CPU and GPU + +template +EIGEN_STRONG_INLINE void logsumexp(const MyDevice & dev, const Tensor& x, Tensor & m, Tensor& z) { + if(x.d.bd == 1 && x.d[1] == 1) { + m.t<0>().device(*dev.edevice) = x.t<1>().maximum(); +#ifdef __CUDACC__ + Eigen::array bcast; + bcast[0] = x.d[0]; + // This needs to be split into two lines to prevent memory allocation + // TODO? Here and in logsoftmax: Is there a better way to subtract a scalar that is already on the GPU without using broadcasting (and without copying the scalar back to the host first) + z.t<0>().device(*dev.edevice) = (x.t<1>() - m.t<1>().broadcast(bcast)).exp().sum(); + z.t<0>().device(*dev.edevice) = z.t<0>().log() + m.t<0>(); +#else + float mval = as_scalar(m); + // This needs to be split into two lines to prevent memory allocation + z.t<0>().device(*dev.edevice) = (x.t<1>() - mval).exp().sum(); + z.t<0>().device(*dev.edevice) = z.t<0>().log() + mval; +#endif + } else { + Eigen::array red_axis; red_axis[0] = 0; + m.tb<1>().device(*dev.edevice) = x.tb<2>().maximum(red_axis); + // TODO: Currently, the first version is slower on CPU, hence the switch +#ifdef __CUDACC__ + Eigen::array bcast({(int)x.d.rows(), 1, 1}); + Eigen::array morph({1, (int)m.d[0], (int)m.d.bd}); + // This needs to be split into two lines to prevent memory allocation + z.tb<1>().device(*dev.edevice) = (x.tb<2>() - m.tb<2>().reshape(morph).broadcast(bcast)).exp().sum(red_axis); + z.tb<1>().device(*dev.edevice) = z.tb<1>().log() + m.tb<1>(); +#else + auto miter = m.v; + for(size_t b = 0; b < x.d.bd; ++b) { + for(size_t i = 0; i < x.d[1]; ++i, ++miter) { + z.tb<1>().chip<1>(b).chip<0>(i).device(*dev.edevice) = (x.tb<2>().chip<2>(b).chip<1>(i) - *miter).exp().sum(); + z.tb<1>().chip<1>(b).chip<0>(i).device(*dev.edevice) = z.tb<1>().chip<1>(b).chip<0>(i).log() + *miter; + } + } +#endif + } +} + +// ===== Functions to be compiled on both CPU and GPU + +#ifdef __CUDACC__ +inline void CUDAMatrixMultiply(const Device_GPU & dev, const Tensor& l, const Tensor& r, Tensor& y, const float* acc_scalar) { + if(l.d.bd == 1 && r.d.bd == y.d.bd) { + // If the left side has one batch, multiply by columns + // [x, z, b] = [x, y] * [y, z, b] + // -> [x, z*b] = [x, y], [y, z*b] + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, + y.d.rows(), y.d.cols() * y.d.batch_elems(), l.d.cols(), + kSCALAR_ONE, + l.v, l.d.rows(), + r.v, r.d.rows(), + acc_scalar, y.v, y.d.rows())); + } else { + // Otherwise, loop over the batches + DYNET_ASSERT(r.d.bd != 1 || r.d.bd != l.d.bd, + "Number of batch elements in matrix multiply must match, but got: " << r.d.bd << ", " << l.d.bd); + for(unsigned b = 0; b < y.d.bd; ++b) { + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, + y.d.rows(), y.d.cols(), l.d.cols(), + kSCALAR_ONE, + l.batch_ptr(b), l.d.rows(), + r.batch_ptr(b), r.d.rows(), + acc_scalar, y.batch_ptr(b), y.d.rows())); + } + } +} +#endif + +template +void AddVectorToAllColumns::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + // TODO: Profile on CPU. Broadcasting may be slow. + if(xs[0]->d.bd >= xs[1]->d.bd) { + Eigen::array bcasts = {1, (int)xs[0]->d[1], (int)(xs[0]->d.bd/xs[1]->d.bd)}; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>() + xs[1]->tb<2>().broadcast(bcasts); + } else { + DYNET_ASSERT(xs[0]->d.bd == 1, + "Bad dimensions in AddVectorToAllColumns::forward: " << xs[0]->d << ", " << xs[1]->d); + Eigen::array bcasts0 = {1, 1, (int)xs[1]->d.bd}; + Eigen::array bcasts1 = {1, (int)xs[0]->d[1], 1}; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>().broadcast(bcasts0) + xs[1]->tb<2>().broadcast(bcasts1); + } +} + +template +void AddVectorToAllColumns::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in AddVetorToAllColumns::backward"); + // TODO: profile on CPU and see whether the chip version is better + if (i == 0) { // x + if(dEdf.d.bd == dEdxi.d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + Eigen::array red_axis = {2}; + dEdxi.t<2>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } + } else { // bias + if(dEdf.d.bd == dEdxi.d.bd) { + Eigen::array red_axis = {1}; + dEdxi.tb<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } else { + DYNET_ASSERT(dEdxi.d.bd == 1, + "Bad dimensions in AddVectorToAllColumns::backward: " << xs[0]->d << ", " << xs[1]->d); + Eigen::array red_axis = {1,2}; + dEdxi.t<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } + } +} +DYNET_NODE_INST_DEV_IMPL(AddVectorToAllColumns) + +// Affine transform uses different implementations for CPU and GPU because this is +// much faster than using Eigen's tensor contractions (as of the writing) +template +void AffineTransform::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() % 2 == 1, "Failed dimension check in AffineTransform::forward"); + if (xs.size() == 1) { + fx.v = xs[0]->v; + return; + } else { + // Add the first matrix + size_t b_size = xs[0]->d.size(), fx_size = fx.d.size(); + if(fx_size == b_size) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); + } else { +#ifdef __CUDACC__ + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d[1]/xs[0]->d[1]; bcast[2] = fx.d.bd/xs[0]->d.bd; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>().broadcast(bcast); +#else + DYNET_ARG_CHECK(xs[0]->d.bd == 1, "In AffineTransform, broadcasting over columns with mini-batched inputs is not implemented yet"); + float *curr_ptr = fx.v, *end_ptr = curr_ptr + fx.d.size(), *in_ptr = xs[0]->v; + do { + memcpy(curr_ptr, in_ptr, sizeof(float)*b_size); + curr_ptr += b_size; + } while(curr_ptr != end_ptr); +#endif + } + + // Perform multiplication +#ifdef __CUDACC__ + for (unsigned i = 1; i < xs.size(); i += 2) + // fx = (acc_sclar)*fx + xs[0] * xs[1] + CUDAMatrixMultiply(dev, *xs[i], *xs[i + 1], fx, kSCALAR_ONE); +#else + // Multiply + for (unsigned i = 1; i < xs.size(); i += 2) { + if(xs[i]->d.bd == 1 && xs[i+1]->d.bd == fx.d.bd) { + fx.colbatch_matrix().noalias() += **xs[i] * xs[i+1]->colbatch_matrix(); + } else { + DYNET_ASSERT(xs[i+1]->d.bd == 1 || xs[i+1]->d.bd == xs[i]->d.bd, "Failed dimension check in AffineTransform::forward"); + for(unsigned b = 0; b < fx.d.bd; ++b) { + fx.batch_matrix(b).noalias() += xs[i]->batch_matrix(b) * xs[i+1]->batch_matrix(b); + } + } + } +#endif + } +} + +template +void AffineTransform::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < xs.size(), "Failed boundary check in AffineTransform::backward"); + // Bias term + if (i == 0) { // bias term + size_t dx_size = dEdxi.d.size(), df_size = dEdf.d.size(); + if(dx_size == df_size) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + DYNET_ARG_CHECK(dEdxi.d.bd == 1, "In AffineTransform, broadcasting over columns with mini-batched inputs is not implemented yet"); +#ifdef __CUDACC__ + if(dEdxi.d[1] == dEdf.d[1]) { + Eigen::array red_axis; red_axis[0] = 2; + dEdxi.t<2>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } else { + Eigen::array red_axis; red_axis[0] = 1; red_axis[1] = 2; + dEdxi.t<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } +#else + if(dEdxi.d[1] == dEdf.d[1]) { + for(unsigned b = 0; b < dEdf.d.bd; ++b) + (*dEdxi).noalias() += dEdf.batch_matrix(b); + } else { + Tensor mychip(dEdxi.d, dEdf.v, dEdf.device, dEdf.mem_pool); + size_t len = dEdf.d.bd * dEdf.d[1]; + for(unsigned b = 0; b < len; ++b) { + (*dEdxi).noalias() += *mychip; + mychip.v += dx_size; + } + } +#endif + } + + // Left argument of matrix multiply + } else if (i % 2 == 1) { + int max_b = max(dEdf.d.bd, xs[i+1]->d.bd); +#if __CUDACC__ + if(dEdxi.d.bd == 1 && (dEdf.d.bd == xs[i+1]->d.bd)) { + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, + dEdxi.d.rows(), dEdxi.d.cols(), dEdf.d.cols() * dEdf.d.batch_elems(), + kSCALAR_ONE, + dEdf.v, dEdf.d.rows(), + xs[i+1]->v, xs[i+1]->d.rows(), + kSCALAR_ONE, dEdxi.v, dEdxi.d.rows())); + } else { + for(int b = 0; b < max_b; ++b) + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, + dEdxi.d.rows(), dEdxi.d.cols(), dEdf.d.cols(), + kSCALAR_ONE, + dEdf.batch_ptr(b), dEdf.d.rows(), + xs[i+1]->batch_ptr(b), xs[i+1]->d.rows(), + kSCALAR_ONE, dEdxi.batch_ptr(b), dEdxi.d.rows())); + } +#else + if(dEdxi.d.bd == 1 && (dEdf.d.bd == xs[i+1]->d.bd)) { + (*dEdxi).noalias() += dEdf.colbatch_matrix() * xs[i+1]->colbatch_matrix().transpose(); + } else { + for(int b = 0; b < max_b; ++b) + dEdxi.batch_matrix(b).noalias() += dEdf.batch_matrix(b) * xs[i+1]->batch_matrix(b).transpose(); + } +#endif + } else { // right argument of matrix multiply + int max_b = max(xs[i-1]->d.bd, dEdf.d.bd); +#if __CUDACC__ + // Do a single multiply if xs[i-1] has one batch + if(xs[i-1]->d.bd == 1 && dEdxi.d.bd == dEdf.d.bd) { + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, + dEdxi.d.rows(), dEdxi.d.cols()*dEdxi.d.batch_elems(), xs[i-1]->d.rows(), + kSCALAR_ONE, + xs[i-1]->v, xs[i-1]->d.rows(), + dEdf.v, dEdf.d.rows(), + kSCALAR_ONE, dEdxi.v, dEdxi.d.rows())); + } else { + for(int b = 0; b < max_b; ++b) + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, + dEdxi.d.rows(), dEdxi.d.cols(), xs[i-1]->d.rows(), + kSCALAR_ONE, + xs[i-1]->batch_ptr(b), xs[i-1]->d.rows(), + dEdf.batch_ptr(b), dEdf.d.rows(), + kSCALAR_ONE, dEdxi.batch_ptr(b), dEdxi.d.rows())); + } +#else + if(xs[i-1]->d.bd == 1 && dEdxi.d.bd == dEdf.d.bd) { + dEdxi.colbatch_matrix().noalias() += (**xs[i-1]).transpose() * dEdf.colbatch_matrix(); + } else { + for(int b = 0; b < max_b; ++b) + dEdxi.batch_matrix(b).noalias() += xs[i-1]->batch_matrix(b).transpose() * dEdf.batch_matrix(b); + } +#endif + } +} +DYNET_NODE_INST_DEV_IMPL(AffineTransform) + +template +void Average::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const unsigned num_args = xs.size(); + if (num_args == 1) { + fx.v = xs[0]->v; + return; + } + auto res = fx.tvec(); + const unsigned remainder = num_args % 4; + switch (remainder) { + case 0: res.setZero(); break; + case 1: res.device(*dev.edevice) = xs[0]->tvec(); break; + case 2: res.device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec(); break; + case 3: res.device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec(); break; + } + for (unsigned i = remainder; i < num_args; i += 4) + res.device(*dev.edevice) += xs[i]->tvec() + xs[i+1]->tvec() + xs[i+2]->tvec() + xs[i+3]->tvec(); + res.device(*dev.edevice) = res / (float)num_args; +} + +template +void Average::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += (dEdf.tvec() / (float)xs.size()); +} +DYNET_NODE_INST_DEV_IMPL(Average) + +template +void Concatenate::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + unsigned curr_row = 0; + src_indices.resize(xs.size()); + Eigen::DSizes indices(0,0,0,0,0); + Eigen::DSizes sizes(fx.d[0], fx.d[1], fx.d[2], fx.d[3],static_cast(fx.d.bd)); + for (unsigned i = 0; i < xs.size(); ++i) { + indices[dimension] = src_indices[i] = curr_row; + const unsigned row_size = xs[i]->d[dimension]; + sizes[dimension] = row_size; + if(fx.d.bd == xs[i]->d.bd) { + fx.tb<4>().slice(indices, sizes).device(*dev.edevice) = xs[i]->tb<4>(); + } else { + Eigen::array bcast; bcast[0] = bcast[1] = bcast[2] = bcast[3] = 1; bcast[4] = fx.d.bd; + fx.tb<4>().slice(indices, sizes).device(*dev.edevice) = xs[i]->tb<4>().broadcast(bcast); + } + curr_row += row_size; + } +} + +template +void Concatenate::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < src_indices.size(), "Failed boundary check in Concatenate::backward: " << i << " >= " << src_indices.size()); + Eigen::DSizes indices(0,0,0,0,0); indices[dimension] = src_indices[i]; + Eigen::DSizes sizes(static_cast(dEdxi.d[0]), + static_cast(dEdxi.d[1]), + static_cast(dEdxi.d[2]), + static_cast(dEdxi.d[3]), + static_cast(fx.d.bd)); + if(dEdxi.d.bd == dEdf.d.bd) { + dEdxi.tb<4>().device(*dev.edevice) += dEdf.tb<4>().slice(indices, sizes); + } else { + Eigen::array red_axis; red_axis[0] = 4; + dEdxi.t<4>().device(*dev.edevice) += dEdf.tb<4>().slice(indices, sizes).sum(red_axis); + } +} +DYNET_NODE_INST_DEV_IMPL(Concatenate) + +template +void ConcatenateToBatch::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + unsigned curr_e = 0; + src_element_indices.resize(xs.size()); + Eigen::DSizes indices(0,0); + Eigen::DSizes sizes(static_cast(fx.d.batch_size()), 0); + for (unsigned i = 0; i < xs.size(); ++i) { + indices[1] = src_element_indices[i] = curr_e; + sizes[1] = xs[i]->d.bd; + fx.tbvec().slice(indices, sizes).device(*dev.edevice) = xs[i]->tbvec(); + curr_e += xs[i]->d.bd; + } + +} + +template +void ConcatenateToBatch::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < src_element_indices.size(), "Failed boundary check in ConcatenateToBatch::backward: " << i << " >= " << src_element_indices.size()); + Eigen::DSizes indices(0, static_cast(src_element_indices[i])); + Eigen::DSizes sizes(static_cast(fx.d.batch_size()), static_cast(xs[i]->d.bd)); + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().slice(indices, sizes); +} +DYNET_NODE_INST_DEV_IMPL(ConcatenateToBatch) + +template +void BinaryLogLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.t<0>().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FBinaryLogLoss()).sum(); +} + +template +void BinaryLogLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[i]->tvec().binaryExpr(xs[1-i]->tvec(), FBinaryLogLossBackward(as_scalar(dEdf))); +} +DYNET_NODE_INST_DEV_IMPL(BinaryLogLoss) + +template +void BlockDropout::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + bernoulli_distribution distribution(1.0 - dropout_probability); + float block_multiplier = distribution(*rndeng)? 1.0 : 0.0; + block_multiplier = + dropout_probability == 1.0? 0.0 : block_multiplier / (1.0 - dropout_probability); + if (dropout_probability > 1.0 || dropout_probability < 0.0) + DYNET_INVALID_ARG("Dropout probability must be in the range [0, 1]"); + *(static_cast(aux_mem)) = block_multiplier; + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * block_multiplier; +} + +template +void BlockDropout::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + float block_multiplier = *(static_cast(aux_mem)); + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * block_multiplier; +} +DYNET_NODE_INST_DEV_IMPL(BlockDropout) + +template +void ConstantMinusX::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(const_minus_op(c)); +} + +template +void ConstantMinusX::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(ConstantMinusX) + +template +void ConstantPlusX::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(const_add_op(c)); +} + +template +void ConstantPlusX::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(ConstantPlusX) + +template +void ConstScalarMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * alpha; +} + +template +void ConstScalarMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in ConstScalarMultiply"); + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * alpha; +} +DYNET_NODE_INST_DEV_IMPL(ConstScalarMultiply) + +template +void Cube::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cube(); +} + +template +void Cube::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * xs[0]->tvec().square() * 3.f; +} +DYNET_NODE_INST_DEV_IMPL(Cube) + +template +void CwiseQuotient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in CwiseQuotient::forward (cdiv)"); + if(xs[0]->d.bd == xs[1]->d.bd) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec() / xs[1]->tvec(); + } else if(xs[0]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + fx.tb<1>().device(*dev.edevice) = xs[0]->tb<1>().broadcast(bcast) / xs[1]->tb<1>(); + } else { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + fx.tb<1>().device(*dev.edevice) = xs[0]->tb<1>() / xs[1]->tb<1>().broadcast(bcast); + } +} + +template +void CwiseQuotient::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in CwiseQuotient::backward (cdiv)"); + if (i == 0) { + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() / xs[1]->tvec(); + } else if(xs[1]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + dEdxi.tb<1>().device(*dev.edevice) += dEdf.tb<1>() / xs[1]->tb<1>().broadcast(bcast); + } else { + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.t<1>().device(*dev.edevice) += (dEdf.tb<1>() / xs[1]->tb<1>()).sum(red_axis); + } + } else { // i = 1 + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec() / xs[1]->tvec().square() * xs[0]->tvec(); + } else if(xs[1]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.t<1>().device(*dev.edevice) -= (dEdf.tb<1>() / xs[1]->tb<1>().square().broadcast(bcast) * xs[0]->tb<1>()).sum(red_axis); + } else { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + dEdxi.tb<1>().device(*dev.edevice) -= dEdf.tb<1>() / xs[1]->tb<1>().square() * xs[0]->tb<1>().broadcast(bcast); + } + } +} +DYNET_NODE_INST_DEV_IMPL(CwiseQuotient) + +template +void CwiseMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in CwiseMultiply::forward (cmult)"); + if(xs[0]->d.bd == xs[1]->d.bd) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * xs[1]->tvec(); + } else { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + if(xs[0]->d.bd == 1) + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast) * xs[1]->tbvec(); + else + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec() * xs[1]->tbvec().broadcast(bcast); + } +} + +template +void CwiseMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in CwiseMultiply::backward (cmult)"); + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * xs[1-i]->tvec(); + } else if(xs[1-i]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * xs[1-i]->tbvec().broadcast(bcast); + } else { + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() * xs[1-i]->tbvec()).sum(red_axis); + } +} +DYNET_NODE_INST_DEV_IMPL(CwiseMultiply) + +template +void ScalarAdd::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarAdd::forward (+)"); + Eigen::array bcast_0 = {1, (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) + xs[1]->tbvec().broadcast(bcast_1); +} + +template +void ScalarAdd::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in ScalarAdd::backward (+)"); + Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; + Eigen::array red_axes_01 = {0, 1}; + if (i == 0) { + if (xs[0]->d.bd == 1) + dEdxi.tvec().device(*dev.edevice) += dEdf.tbvec().sum(red_axis_1); + else + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec(); + } else { + if (xs[1]->d.bd == 1) + dEdxi.t<0>().device(*dev.edevice) += dEdf.tbvec().sum(red_axes_01); + else + dEdxi.tb<0>().device(*dev.edevice) += dEdf.tbvec().sum(red_axis_0); + } +} +DYNET_NODE_INST_DEV_IMPL(ScalarAdd) + +template +void ScalarMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarMultiply::forward (cmult)"); + + Eigen::array bcast_0 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {1, (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) * xs[1]->tbvec().broadcast(bcast_1); +} + +template +void ScalarMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in ScalarMultiply::backward (cmult)"); + Eigen::array bcast_0 = {(int) fx.d.batch_size(), (int)( fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {1, (int)(fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; + Eigen::array red_axes_01 = {0, 1}; + if (i == 0) { + if (xs[0]->d.bd == 1) + dEdxi.t<0>().device(*dev.edevice) += (dEdf.tbvec() * xs[1]->tbvec().broadcast(bcast_1)).sum(red_axes_01); + else + dEdxi.tb<0>().device(*dev.edevice) += (dEdf.tbvec() * xs[1]->tbvec().broadcast(bcast_1)).sum(red_axis_0); + } else { + if (xs[1]->d.bd == 1) + dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast_0)).sum(red_axis_1); + else + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast_0); + } +} +DYNET_NODE_INST_DEV_IMPL(ScalarMultiply) + +template +void ScalarQuotient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarQuotient::forward (cdiv)"); + Eigen::array bcast_0 = {1, (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) / xs[1]->tbvec().broadcast(bcast_1); +} + +template +void ScalarQuotient::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in ScalarQuotient::backward (cdiv)"); + Eigen::array bcast = {(int)fx.d.batch_size(), (int)(fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast2 = {1, (int)(fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; + Eigen::array red_axes_01 = {0, 1}; + if (i == 0) { + if (xs[0]->d.bd == 1) + dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() / xs[1]->tbvec().broadcast(bcast)).sum(red_axis_1); + else + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() / xs[1]->tbvec().broadcast(bcast); + } else { + if (xs[1]->d.bd == 1) + dEdxi.t<0>().device(*dev.edevice) += - (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast2)).sum(red_axes_01) / xs[1]->t<0>().square(); + else + dEdxi.tb<0>().device(*dev.edevice) += - (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast2)).sum(red_axis_0) / xs[1]->tb<0>().square(); + } +} +DYNET_NODE_INST_DEV_IMPL(ScalarQuotient) + + +template +void DotProduct::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::array red_axis; red_axis[0] = 0; + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + if(fx.d.bd == 1) { + fx.t<0>().device(*dev.edevice) = (xs[0]->tvec() * xs[1]->tvec()).sum(); + } else if(xs[0]->d.bd == xs[1]->d.bd) { + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec() * xs[1]->tbvec()).sum(red_axis); + } else if(xs[0]->d.bd == 1) { + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec().broadcast(bcast) * xs[1]->tbvec()).sum(red_axis); + } else { + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec() * xs[1]->tbvec().broadcast(bcast)).sum(red_axis); + } +} + +template +void DotProduct::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if(fx.d.bd == 1) { + Eigen::array bcast; bcast[0] = xs[i]->d.batch_size(); + dEdxi.tvec().device(*dev.edevice) += xs[1-i]->tvec() * dEdf.tvec().broadcast(bcast); + } else { + Eigen::array bcast; bcast[0] =xs[i]->d.batch_size(); bcast[1] = 1; + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tbvec().device(*dev.edevice) += xs[1-i]->tbvec() * dEdf.tbvec().broadcast(bcast); + } else if(dEdxi.d.bd == 1) { + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.tvec().device(*dev.edevice) += (xs[1-i]->tbvec() * dEdf.tbvec().broadcast(bcast)).sum(red_axis); + } else { + Eigen::array batchcast; batchcast[0] = 1; batchcast[1] = fx.d.bd; + dEdxi.tbvec().device(*dev.edevice) += (xs[1-i]->tbvec().broadcast(batchcast) * dEdf.tbvec().broadcast(bcast)); + } + } +} +DYNET_NODE_INST_DEV_IMPL(DotProduct) + +template +void Dropout::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * m.tvec(); +} + +template +void Dropout::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * m.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Dropout) + + +template +void DropoutBatch::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Dim mask_dim({1},xs[0]->d.batch_elems()); + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec() * m.tbvec().broadcast(bcast); +} + +template +void DropoutBatch::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Dim mask_dim({1},xs[0]->d.batch_elems()); + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * m.tbvec().broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(DropoutBatch) + + +template +void DropoutDim::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Dim mask_dim(dim); + mask_dim.d[dimension]=1; + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); + Eigen::array bcast = {1, 1, 1, 1}; bcast[dimension] = xs[0]->d[dimension]; + fx.tb<3>().device(*dev.edevice) = xs[0]->tb<3>() * m.tb<3>().broadcast(bcast); +} + +template +void DropoutDim::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Dim mask_dim(dim); + mask_dim.d[dimension]=1; + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + Eigen::array bcast = {1, 1, 1, 1}; bcast[dimension] = dEdf.d[dimension]; + dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<3>() * m.tb<3>().broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(DropoutDim) + +template +void Erf::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().erf(); +} + +template +void Erf::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().binaryExpr(dEdf.tvec(), scalar_erf_backward_op()); +} +DYNET_NODE_INST_DEV_IMPL(Erf) + +template +void Exp::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().exp(); +} + +template +void Exp::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * fx.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Exp) + +template +void GaussianNoise::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_normal(m, 0, stddev); + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + m.tvec(); +} + +template +void GaussianNoise::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(GaussianNoise) + +template +void Hinge::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in Hinge::forward"); + Tensor eloss(xs[0]->d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + // TODO: Can we do this on device? + if(pelement != nullptr) { + DYNET_ARG_CHECK(fx.d.bd == 1, + "Hinge was passed a single index but the corresponding expression has multiple mini-batch elements (" << fx.d.bd << ")"); + const real mlystar = margin - TensorTools::access_element(*xs[0], *pelement); + eloss.tvec().device(*dev.edevice) = (xs[0]->tvec() + mlystar).cwiseMax(0.f); + TensorTools::set_element(eloss, *pelement, 0.f); + fx.t<0>().device(*dev.edevice) = eloss.tvec().sum(); + } else { + DYNET_ASSERT(pelements != nullptr, "Hinge::forward has neither pointer to single element nor vector"); + DYNET_ARG_CHECK(xs[0]->d.bd == pelements->size(), + "The list of indexes passed to Hinge has a length (" << pelements->size() << + ") that doesn't match the number of mini-batch elements in the corresponding expression (" << xs[0]->d << ")"); + size_t batch_size = xs[0]->d.batch_size(); + for(size_t b = 0; b < fx.d.bd; b++) { + const real mlystar = margin - TensorTools::access_element(*xs[0], b*batch_size + (*pelements)[b]); + eloss.tb<1>().chip<1>(b).device(*dev.edevice) = (xs[0]->tb<1>().chip<1>(b) + mlystar).cwiseMax(0.f); + TensorTools::set_element(eloss, b*batch_size + (*pelements)[b], 0.f); + fx.tb<0>().chip<0>(b).device(*dev.edevice) = eloss.tb<1>().chip<1>(b).sum(); + } + } +} + +template +void Hinge::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in Hinge::backward"); + if(pelement != nullptr) { + if(as_scalar(fx)) { // there was some loss + const float d = as_scalar(dEdf); + Tensor eloss(xs[0]->d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + // TODO: The > comparison should not be calculated twice. Keep it in auxiliary memory? + dEdxi.tvec().device(*dev.edevice) += (eloss.tvec() > 0.f).cast() * d; +#if defined(__CUDACC__) && defined(EIGEN_NO_MALLOC) + DYNET_RUNTIME_ERR("CUDA memory allocation in hinge"); +#endif + dEdxi.tvec().chip<0>(*pelement).device(*dev.edevice) -= (eloss.tvec() > 0.f).template cast().sum() * d; + } + } else { + DYNET_ASSERT(pelements != nullptr, "Hinge::backward has neither pointer to single element nor vector"); + vector fx_vec = as_vector(fx); + vector d_vec = as_vector(dEdf); + Tensor eloss(xs[0]->d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + for(size_t b = 0; b < fx.d.bd; b++) { + if(fx_vec[b]) { // there was some loss + // TODO: The > comparison should not be calculated twice. Keep it in auxiliary memory? + dEdxi.tb<1>().chip<1>(b).device(*dev.edevice) += (eloss.tb<1>().chip<1>(b) > 0.f).cast() * d_vec[b]; +#if defined(__CUDACC__) && defined(EIGEN_NO_MALLOC) + DYNET_RUNTIME_ERR("CUDA memory allocation in hinge"); +#endif + dEdxi.tb<1>().chip<1>(b).chip<0>((*pelements)[b]).device(*dev.edevice) -= (eloss.tb<1>().chip<1>(b) > 0.f).template cast().sum() * d_vec[b]; + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(Hinge) + +template +void HuberDistance::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "HuberDistance::forward dimension check failed"); + fx.t<0>().device(*dev.edevice) = (xs[0]->tvec() - xs[1]->tvec()).unaryExpr(FHuberForward(d)).sum(); +} + +template +void HuberDistance::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "HuberDistance::backward dimension check failed"); + dEdxi.tvec().device(*dev.edevice) += (xs[i]->tvec() - xs[1-i]->tvec()).unaryExpr(FHuberBackward(d, as_scalar(dEdf))); +} +DYNET_NODE_INST_DEV_IMPL(HuberDistance) + +template +void Identity::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.d = xs[0]->d; + fx.v = xs[0]->v; +} + +template +void Identity::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Identity) + +template +void KMHNGram::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); +#else + auto x = **xs[0]; + const int new_cols = x.cols() - n + 1; + DYNET_ASSERT(new_cols > 0, "Failed dimension check in KMHNGram"); + auto res = *fx; + res.setZero(); + for (int j = 0; j < new_cols; ++j) { + auto c_j = res.col(j); + for (unsigned k = 0; k < n; ++k) + c_j += x.col(j + k); + } +#endif +} + +template +void KMHNGram::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); +#else + const int c = dEdf.d.cols(); + for (int j = 0; j < c; ++j) + for (unsigned k = 0; k < n; ++k) + (*dEdxi).col(j+k) += (*dEdf).col(j); +#endif +} +DYNET_NODE_INST_DEV_IMPL(KMHNGram) + +template +void L1Distance::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in L1Distance::forward"); + fx.t<0>().device(*dev.edevice) = (xs[0]->tvec() - xs[1]->tvec()).abs().sum(); +} + +template +void L1Distance::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in L1Distance::backward"); + dEdxi.tvec().device(*dev.edevice) += (xs[i]->tvec() - xs[1-i]->tvec()).unaryExpr(FL1Backward(as_scalar(dEdf))); +} +DYNET_NODE_INST_DEV_IMPL(L1Distance) + +template +void Log::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().log(); +} + +template +void Log::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() / xs[0]->tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Log) + +template +void LogDet::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("LogDet not implemented for CUDA"); +#else + fx.v[0] = logdet(**xs[0], false); +#endif +} + +template +void LogDet::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); +#else + auto trans = (**xs[0]).transpose(); + (*dEdxi) += (dEdf.v[0]) * trans.inverse(); +#endif +} +DYNET_NODE_INST_DEV_IMPL(LogDet) + +template +void LogGamma::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().lgamma(); +} + +template +void LogGamma::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().digamma() * dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(LogGamma) + +template +void LogisticSigmoid::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in LogisticSigmoid::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(scalar_logistic_sigmoid_op()); +} + +template +void LogisticSigmoid::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), scalar_logistic_sigmoid_backward_op()); +} +DYNET_NODE_INST_DEV_IMPL(LogisticSigmoid) + +template +void LogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in LogSoftmax::forward"); + Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + Tensor m(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem + z.d.size(), fx.device, DeviceMempool::FXS); + logsumexp(dev, *xs[0], m, z); + if(fx.d.size() == fx.d.rows()) { +#ifdef __CUDACC__ + Eigen::array bcast; + bcast[0] = xs[0]->d[0]; + fx.t<1>().device(*dev.edevice) = xs[0]->t<1>() - z.t<1>().broadcast(bcast); +#else + fx.t<1>().device(*dev.edevice) = xs[0]->t<1>() - as_scalar(z); +#endif + } else { + // TODO? Is this broadcast efficient on CPU? + Eigen::array bcasts = {(int)xs[0]->d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>() - z.tvec().reshape(morph).broadcast(bcasts); + } +} + +template +void LogSoftmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + // TODO? Is this broadcast efficient on CPU? + Eigen::array red_axis; red_axis[0] = 0; + z.tb<1>().device(*dev.edevice) = dEdf.tb<2>().sum(red_axis); + Eigen::array bcast = {(int)fx.d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + dEdxi.tb<2>().device(*dev.edevice) += fx.tb<2>().exp() * -z.tvec().reshape(morph).broadcast(bcast) + dEdf.tb<2>(); +} +DYNET_NODE_INST_DEV_IMPL(LogSoftmax) + +template +void LogSumExp::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs.size() == 1) { + fx.v = xs[0]->v; + } else { + // TODO: Ideally we wouldn't need to allocate this memory permanently. + // We need a good method for allocating "scratch" memory that is only used temporarily. + Tensor ms(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + Eigen::array bcast = {1,fx.d.bd}; + // Calculate the max + if(ms.d.bd == xs[0]->d.bd) + ms.tvec().device(*dev.edevice) = xs[0]->tvec(); + else + ms.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast); + for (size_t i = 1; i < xs.size(); ++i) { + if(ms.d.bd == xs[i]->d.bd) + ms.tvec().device(*dev.edevice) = ms.tvec().cwiseMax(xs[i]->tvec()); + else + ms.tbvec().device(*dev.edevice) = ms.tbvec().cwiseMax(xs[i]->tbvec().broadcast(bcast)); + } + // sumexp + if(ms.d.bd == xs[0]->d.bd) + fx.tvec().device(*dev.edevice) = (xs[0]->tvec() - ms.tvec()).exp(); + else + fx.tbvec().device(*dev.edevice) = (xs[0]->tbvec().broadcast(bcast) - ms.tbvec()).exp(); + for (size_t i = 1; i < xs.size(); ++i) { + if(ms.d.bd == xs[i]->d.bd) + fx.tvec().device(*dev.edevice) += (xs[i]->tvec() - ms.tvec()).exp(); + else + fx.tbvec().device(*dev.edevice) += (xs[i]->tbvec().broadcast(bcast) - ms.tbvec()).exp(); + } + // log and add max + fx.tvec().device(*dev.edevice) = fx.tvec().log() + ms.tvec(); + } +} + +template +void LogSumExp::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if (xs.size() == 1) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + // df/dx_i = 1/{sum_j exp(x_j)} * exp(x_i)} + // = 1/{exp f(x)} * exp(x_i) + // = exp(x_i - f(x)) + if(fx.d.bd == xs[i]->d.bd) { + dEdxi.tvec().device(*dev.edevice) += (xs[i]->tvec() - fx.tvec()).exp() * dEdf.tvec(); + } else { + Eigen::array bcast = {1,fx.d.bd}; + Eigen::array red_axis = {1}; + dEdxi.tvec().device(*dev.edevice) += ((xs[i]->tbvec().broadcast(bcast) - fx.tbvec()).exp() * dEdf.tbvec()).sum(red_axis); + } + } +} +DYNET_NODE_INST_DEV_IMPL(LogSumExp) + +template +void MatrixInverse::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MatrixInverse::forward"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("MatrixInverse not yet implemented for CUDA"); +#else + auto x = **xs[0]; + auto y = *fx; + y = x.inverse(); +#endif + // TODO: Change into tensors after resolving test errors + // fx.t<2>().device(*dev.edevice) = xs[0]->t<2>().inverse(); +} + +template +void MatrixInverse::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MatrixInverse::backward"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("MatrixInverse not yet implemented for CUDA"); +#else + auto d = *dEdf; + auto y = *fx; + (*dEdxi) -= y * d * y; +#endif +} +DYNET_NODE_INST_DEV_IMPL(MatrixInverse) + +template +void MatrixMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in MatrixMultiply::forward"); +#ifdef __CUDACC__ + // fx = 0*fx + xs[0] * xs[1] + CUDAMatrixMultiply(dev, *xs[0], *xs[1], fx, kSCALAR_ZERO); +#else + DYNET_ASSERT(fx.d.bd == max(xs[0]->d.bd, xs[1]->d.bd), "Failed dimension check in MatrixMultiply::forward"); + if(xs[0]->d.bd == 1) { + // If the left side has one batch, multiply by columns + // [x, z, b] = [x, y] * [y, z, b] + // -> [x, z*b] = [x, y], [y, z*b] + fx.colbatch_matrix().noalias() = **xs[0] * xs[1]->colbatch_matrix(); + } else { + // Otherwise, loop over the batches + DYNET_ASSERT(xs[1]->d.bd == 1 || xs[1]->d.bd == xs[0]->d.bd, "Failed dimension check in MatrixMultiply::forward"); + for(unsigned b = 0; b < xs[0]->d.bd; ++b) + fx.batch_matrix(b).noalias() = xs[0]->batch_matrix(b) * xs[1]->batch_matrix(b); + } +#endif +} + +template +void MatrixMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in MatrixMultiply::backward"); + int max_b = max(xs[0]->d.bd, xs[1]->d.bd); +#if __CUDACC__ + if (i == 0) { + if(dEdxi.d.bd == 1 && (dEdf.d.bd == xs[1]->d.bd)) { + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, + dEdxi.d.rows(), dEdxi.d.cols(), dEdf.d.cols() * dEdf.d.batch_elems(), + kSCALAR_ONE, + dEdf.v, dEdf.d.rows(), + xs[1]->v, xs[1]->d.rows(), + kSCALAR_ONE, dEdxi.v, dEdxi.d.rows())); + } else { + for(int b = 0; b < max_b; ++b) + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, + dEdxi.d.rows(), dEdxi.d.cols(), dEdf.d.cols(), + kSCALAR_ONE, + dEdf.batch_ptr(b), dEdf.d.rows(), + xs[1]->batch_ptr(b), xs[1]->d.rows(), + kSCALAR_ONE, dEdxi.batch_ptr(b), dEdxi.d.rows())); + } + } else { + // Do a single multiply if xs[0] has one batch + if(xs[0]->d.bd == 1) { + // dEdxi.colbatch_matrix().noalias() += (**xs[0]).transpose() * dEdf.colbatch_matrix(); + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, + dEdxi.d.rows(), dEdxi.d.cols()*dEdxi.d.batch_elems(), xs[0]->d.rows(), + kSCALAR_ONE, + xs[0]->v, xs[0]->d.rows(), + dEdf.v, dEdf.d.rows(), + kSCALAR_ONE, dEdxi.v, dEdxi.d.rows())); + } else { + for(int b = 0; b < max_b; ++b) + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, + dEdxi.d.rows(), dEdxi.d.cols(), xs[0]->d.rows(), + kSCALAR_ONE, + xs[0]->batch_ptr(b), xs[0]->d.rows(), + dEdf.batch_ptr(b), dEdf.d.rows(), + kSCALAR_ONE, dEdxi.batch_ptr(b), dEdxi.d.rows())); + } + } +#else + if (i == 0) { + if(dEdxi.d.bd == 1 && (dEdf.d.bd == xs[1]->d.bd)) { + (*dEdxi).noalias() += dEdf.colbatch_matrix() * xs[1]->colbatch_matrix().transpose(); + } else { + for(int b = 0; b < max_b; ++b) + dEdxi.batch_matrix(b).noalias() += dEdf.batch_matrix(b) * xs[1]->batch_matrix(b).transpose(); + } + } else { + if(xs[0]->d.bd == 1) { + dEdxi.colbatch_matrix().noalias() += (**xs[0]).transpose() * dEdf.colbatch_matrix(); + } else { + for(int b = 0; b < max_b; ++b) + dEdxi.batch_matrix(b).noalias() += xs[0]->batch_matrix(b).transpose() * dEdf.batch_matrix(b); + } + } +#endif +} +DYNET_NODE_INST_DEV_IMPL(MatrixMultiply) + +template +void Max::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor t(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + t.tvec().device(*dev.edevice) = (xs[0]->tvec() > xs[1]->tvec()).cast(); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMax(xs[1]->tvec()); +} + +template +void Max::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in Max::backward"); + const Tensor t(dEdxi.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) += t.tvec() * dEdf.tvec(); + } else { + dEdxi.tvec().device(*dev.edevice) += t.tvec().binaryExpr(dEdf.tvec(), FMaxBackwardInv()); + } +} +DYNET_NODE_INST_DEV_IMPL(Max) + +template +void NoBackprop::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.d = xs[0]->d; + fx.v = xs[0]->v; +} + +template +void NoBackprop::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // no op +} +DYNET_NODE_INST_DEV_IMPL(NoBackprop) + +template +void FlipGradient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.v = xs[0]->v; +} + +template +void FlipGradient::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // takes negative on backprop + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(FlipGradient) + +template +void MaxPooling1D::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_RUNTIME_ERR("MaxPooling1D::forward_dev_impl not implemented yet"); +#if 0 + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MaxPooling1D::forward"); + const Tensor& x = *xs.front(); + const unsigned x_rows = x.rows(); + DYNET_ASSERT(x.cols() == 1, "Failed dimension check in MaxPooling1D::forward"); + const unsigned fx_rows = x_rows / width; + ind.resize(fx_rows); + Tensor fx = Zero(Dim(fx_rows, 1)); + for (unsigned i = 0; i < fx_rows; ++i) { + unsigned from = i * width; + unsigned to = from + width; + if (to > x_rows) to = x_rows; + real best = x(from, 0); + unsigned bestr = from; + for (unsigned r = from + 1; r < to; ++r) { + if (x(r, 0) > best) { + best = x(r,0); + bestr = r; + } + } + ind[i] = bestr; + fx(i, 0) = best; + } + return fx; +#endif +} + +template +void MaxPooling1D::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("MaxPooling1D::backward_dev_impl not implemented yet"); +#if 0 + const Tensor& x = *xs.front(); + const unsigned x_rows = x.rows(); + Tensor dEdx = Zero(Dim(x_rows, 1)); + const unsigned fx_rows = x_rows / width; + DYNET_ASSERT(fx_rows == ind.size(), "Failed dimension check in MaxPooling1D::backward"); + DYNET_ASSERT(fx_rows == dEdf.rows(), "Failed dimension check in MaxPooling1D::backward"); + for (unsigned i = 0; i < fx_rows; ++i) + dEdx(ind[i], 0) = dEdf(i, 0); + return dEdx; +#endif +} +DYNET_NODE_INST_DEV_IMPL(MaxPooling1D) + +template +void Min::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor t(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + t.tvec().device(*dev.edevice) = (xs[0]->tvec() < xs[1]->tvec()).cast(); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMin(xs[1]->tvec()); +} + +template +void Min::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in Min::backward"); + const Tensor t(dEdxi.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) += t.tvec() * dEdf.tvec(); + } else { + dEdxi.tvec().device(*dev.edevice) += t.tvec().binaryExpr(dEdf.tvec(), FMaxBackwardInv()); + } +} +DYNET_NODE_INST_DEV_IMPL(Min) + +template +void Negate::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in Negate::forward"); + fx.tvec().device(*dev.edevice) = -xs[0]->tvec(); +} + +template +void Negate::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in Negate::backward"); + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Negate) + +template +void PairwiseRankLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FPairwiseRankLoss(margin)); +} + +template +void PairwiseRankLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) -= fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); + } else { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); + } +} +DYNET_NODE_INST_DEV_IMPL(PairwiseRankLoss) + +// x_1 is a vector +// y = (x_1)_{*pval} +template +void PickElement::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if(pval) { + DYNET_ARG_CHECK(*pval < xs[0]->d[dimension], + "PickElement::forward_impl requested element " << *pval << " from a dimension of length " << xs[0]->d[dimension]); + // TODO: This limit of up to 4 is somewhat arbitrary. We need to decide how to handle + // things with "maximum tensor size". + fx.tb<3>().device(*dev.edevice) = xs[0]->tb<4>().chip(*pval, dimension); + } else { + DYNET_ASSERT(pvals != nullptr, "Neither single nor vector of elements available in PickElement::forward"); + DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), + "In PickElement::forward, number of elements in the passed-in index vector (" << pvals->size() << ")" + " did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); + for(unsigned b = 0; b < pvals->size(); ++b) { + DYNET_ARG_CHECK((*pvals)[b] < xs[0]->d[dimension], + "PickElement::forward_impl requested element " << (*pvals)[b] << " from a dimension of length " << xs[0]->d[dimension]); + if(xs[0]->d.bd == 1){ + fx.tb<2>().chip<2>(b).device(*dev.edevice) = xs[0]->t<3>().chip((*pvals)[b], dimension); + }else{ + fx.tb<2>().chip<2>(b).device(*dev.edevice) = xs[0]->tb<3>().chip<3>(b).chip((*pvals)[b], dimension); + } + } + } +} + +// derivative is 0 in all dimensions except 1 for the selected element +template +void PickElement::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in PickElement::backward"); + if(pval) { + dEdxi.tb<3>().chip(*pval, dimension).device(*dev.edevice) += dEdf.tb<2>(); + } else { + DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickElement::forward"); + for(unsigned b = 0; b < pvals->size(); ++b){ + if(xs[0]->d.bd == 1){ + dEdxi.t<3>().chip((*pvals)[b], dimension).device(*dev.edevice) += dEdf.tb<2>().chip<2>(b); + }else{ + dEdxi.tb<3>().chip<3>(b).chip((*pvals)[b], dimension).device(*dev.edevice) += dEdf.tb<2>().chip<2>(b); + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(PickElement) + +template +void PickNegLogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs[0]->d.cols() == 1) { + Tensor z(Dim({1},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + Tensor m(Dim({1},fx.d.bd), (float*)aux_mem + fx.d.bd, fx.device, DeviceMempool::FXS); + unsigned int *ids_dev = (unsigned int*)((float*)aux_mem + 2*fx.d.bd), *ids_host; +#if __CUDACC__ + ids_host = (unsigned int*)malloc(fx.d.bd * sizeof(unsigned int)); +#else + ids_host = ids_dev; +#endif + if(pval) { + *ids_host = *pval; + } else { + DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickNegLogSoftmax::forward"); + DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), + "In PickNegLogSoftmax::forward, number of elements in the passed-in index vector (" << pvals->size() << ")" + " did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); + size_t batch_size = xs[0]->d.batch_size(); + for(unsigned b = 0; b < fx.d.bd; ++b) + ids_host[b] = batch_size * b + (*pvals)[b]; + } +#if __CUDACC__ + CUDA_CHECK(cudaMemcpyAsync(ids_dev, ids_host, fx.d.bd * sizeof(unsigned int), cudaMemcpyHostToDevice)); + logsumexp(dev, *xs[0], m, z); + dynet::gpu::sparse_to_dense_assign(fx.d.bd, ids_dev, xs[0]->v, fx.v); + free(ids_host); +#else + logsumexp(dev, *xs[0], m, z); + for(unsigned b = 0; b < fx.d.bd; ++b) + fx.v[b] = xs[0]->v[ids_dev[b]]; +#endif + fx.tvec().device(*dev.edevice) = z.tvec() - fx.tvec(); + } else { + DYNET_RUNTIME_ERR("PickNegLogSoftmax::forward not yet implemented for multiple columns"); + } +} + +template +void PickNegLogSoftmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if (xs[0]->d.cols() == 1) { + Tensor z(Dim({1},fx.d.batch_elems()), (float*)aux_mem, fx.device, DeviceMempool::FXS); + unsigned int *ids_dev = (unsigned int*)((float*)aux_mem + 2*fx.d.bd); +#if __CUDACC__ + Eigen::array bcast({(int)xs[0]->d[0],1}); + dEdxi.tb<1>().device(*dev.edevice) += (xs[0]->tb<1>() - z.tb<1>().broadcast(bcast)).exp() * dEdf.tb<1>().broadcast(bcast); + dynet::gpu::dense_to_sparse_subtract(fx.d.bd, ids_dev, dEdf.v, dEdxi.v); +#else + // TODO: We want to do broadcasting here too, but it's slow + for(unsigned b = 0; b < fx.d.bd; ++b) { + dEdxi.tb<1>().chip<1>(b).device(*dev.edevice) += (xs[0]->tb<1>().chip<1>(b) - z.v[b]).exp() * dEdf.v[b]; + dEdxi.v[ids_dev[b]] -= dEdf.v[b]; + } +#endif + } else { + DYNET_RUNTIME_ERR("PickNegLogSoftmax::backward not yet implemented for multiple columns"); + } +} +DYNET_NODE_INST_DEV_IMPL(PickNegLogSoftmax) + +// x_1 is a matrix +// y = (x_1)[start:end] +// slice of matrix from index start (inclusive) to index end (exclusive) +template +void PickRange::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::DSizes indices(0,0,0,0,0); + indices[dim] = start; + Eigen::DSizes sizes(static_cast(fx.d[0]), + static_cast(fx.d[1]), + static_cast(fx.d[2]), + static_cast(fx.d[3]), + static_cast(fx.d.bd)); + sizes[dim] = end-start; + fx.tb<4>().device(*dev.edevice) = xs[0]->tb<4>().slice(indices, sizes); +} + +// derivative is 0 in all dimensions except the slice range +template +void PickRange::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Eigen::DSizes indices(0,0,0,0,0); + indices[dim] = start; + Eigen::DSizes sizes(static_cast(fx.d[0]), + static_cast(fx.d[1]), + static_cast(fx.d[2]), + static_cast(fx.d[3]), + static_cast(fx.d.bd)); + sizes[dim] = end-start; + dEdxi.tb<4>().slice(indices, sizes).device(*dev.edevice) += dEdf.tb<4>(); +} +DYNET_NODE_INST_DEV_IMPL(PickRange) + +template +void PickBatchElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (pval) { + fx.tvec().device(*dev.edevice) = xs[0]->tbvec().chip<1>(*pval); + } else { + DYNET_ASSERT(pvals != nullptr, "Neither single nor vector of elements available in PickBatchElements::forward"); + DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), + "In PickBatchElements::forward, number of elements in the passed-in index vector (" << pvals->size() << ") " + "did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); + for (unsigned b = 0; b < pvals->size(); ++b) { + DYNET_ARG_CHECK((*pvals)[b] < xs[0]->d.bd, + "PickBatchElements::forward_impl requested element " << (*pvals)[b] << " from a batch size of " << xs[0]->d.bd); + fx.tbvec().chip<1>(b).device(*dev.edevice) = xs[0]->tbvec().chip<1>((*pvals)[b]); + } + } +} + +template +void PickBatchElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in PickBatchElements::backward"); + if (pval) { + dEdxi.tbvec().chip<1>(*pval).device(*dev.edevice) += dEdf.tvec(); + } else { + DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickBatchElements::backward"); + for (unsigned b = 0; b < pvals->size(); ++b) + dEdxi.tbvec().chip<1>((*pvals)[b]).device(*dev.edevice) += dEdf.tbvec().chip<1>(b); + } +} +DYNET_NODE_INST_DEV_IMPL(PickBatchElements) + +template +void PoissonRegressionLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const real y = *pty; + const auto z = std::lgamma(y + 1); + // const auto x = as_scalar(*xs[0]); + fx.t<0>().device(*dev.edevice) = xs[0]->t<0>().exp() + z - xs[0]->t<0>() * y; +} + +template +void PoissonRegressionLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const real y = *pty; + dEdxi.t<0>().device(*dev.edevice) += xs[0]->t<0>().exp() - y; +} +DYNET_NODE_INST_DEV_IMPL(PoissonRegressionLoss) + +template +void Pow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed dimension check in Pow::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().pow(as_scalar(*xs[1])); +} + +template +void Pow::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed dimension check in Pow::backward"); + real x2 = as_scalar(*xs[1]); + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().pow(x2 - 1) * dEdf.tvec() * x2; + } else { +#if defined(__CUDACC__) && defined(EIGEN_NO_MALLOC) + DYNET_RUNTIME_ERR("CUDA memory allocation in Pow"); +#endif + // y = a^x + // dy/dx = a^x * log(a) + dEdxi.t<0>().device(*dev.edevice) += (fx.tvec() * xs[0]->tvec().log() * dEdf.tvec()).sum(); + } +} +DYNET_NODE_INST_DEV_IMPL(Pow) + +template +void Rectify::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in Rectify::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMax(0.f); +} + +template +void Rectify::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); +} +DYNET_NODE_INST_DEV_IMPL(Rectify) + +template +void Reshape::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + // just point to the input memory and change dimensions + // dimensions are handled by forward_dim + fx.v = xs[0]->v; +} + +template +void Reshape::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const Tensor reshaped(dEdxi.d, dEdf.v, dEdxi.device, dEdf.mem_pool); + dEdxi.tvec().device(*dev.edevice) += reshaped.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Reshape) + +template +void RestrictedLogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in RestrictedLogSoftmax"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("RestrictedLogSoftmax not yet implemented for CUDA (contributions welcome!)"); +#else + // TODO create auxiliary mask with -infty's + // and do usual LogSoftmax stuff + if(denom.size() == 0) + DYNET_INVALID_ARG("Number of elements in denominator of RestrictedLogSoftmax::forward must be zero"); + auto x = **xs[0]; + if(denom.size() == 0) + DYNET_RUNTIME_ERR("RestrictedLogSoftmax currently only supports single column expressions (contributions expanding support to multiple columns welcome!)"); + const real logz = logsumexp(x, denom); + TensorTools::constant(fx, -numeric_limits::infinity()); + for (auto i : denom) + (*fx)(i,0) = x(i,0) - logz; + if (denom.size() == 1) (*fx)(denom.front(), 0) = 0; +#endif +} + +template +void RestrictedLogSoftmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in RestrictedLogSoftmax"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("RestrictedLogSoftmax not yet implemented for CUDA (contributions welcome!)"); +#else + float z = 0; + for (auto ind : denom) + z += (*dEdf)(ind, 0); + for (auto ind : denom) + (*dEdxi)(ind, 0) += (*dEdf)(ind, 0) - expf((*fx)(ind, 0)) * z; +#endif +} +DYNET_NODE_INST_DEV_IMPL(RestrictedLogSoftmax) + +template +void SelectCols::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectCols::forward"); + auto& rm = *pcols; + for (unsigned i = 0; i < rm.size(); ++i) { + DYNET_ARG_CHECK(rm[i] < xs[0]->d.cols(), + "Out-of-bounds index " << rm[i] << " in SelectCols over expression of dimensions " << xs[0]->d); + fx.t<2>().chip<1>(i).device(*dev.edevice) = xs[0]->t<2>().chip<1>(rm[i]); + } +} + +template +void SelectCols::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectCols::backward"); + auto& rm = *pcols; + for (unsigned i = 0; i < rm.size(); ++i) + dEdxi.t<2>().chip<1>(rm[i]).device(*dev.edevice) += dEdf.t<2>().chip<1>(i); +} +DYNET_NODE_INST_DEV_IMPL(SelectCols) + +template +void SelectRows::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectRows::forward"); + auto& rm = *prows; + for (unsigned i = 0; i < rm.size(); ++i) { + DYNET_ARG_CHECK(rm[i] < xs[0]->d.rows(), + "Out-of-bounds index " << rm[i] << " in SelectRows over expression of dimensions " << xs[0]->d); + fx.t<2>().chip<0>(i).device(*dev.edevice) = xs[0]->t<2>().chip<0>(rm[i]); + } +} + +template +void SelectRows::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectRows::backward"); + auto& rm = *prows; + for (unsigned i = 0; i < rm.size(); ++i) + dEdxi.t<2>().chip<0>(rm[i]).device(*dev.edevice) += dEdf.t<2>().chip<0>(i); +} +DYNET_NODE_INST_DEV_IMPL(SelectRows) + +template +void Softmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in Softmax::forward"); + Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + Tensor m(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem + z.d.size(), fx.device, DeviceMempool::FXS); + logsumexp(dev, *xs[0], m, z); + // TODO? Is this broadcast efficient on CPU? + Eigen::array bcasts = {(int)xs[0]->d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + fx.tb<2>().device(*dev.edevice) = (xs[0]->tb<2>() - z.tvec().reshape(morph).broadcast(bcasts)).exp(); +} + +template +void Softmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Tensor z(Dim({fx.d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + // TODO? Is this broadcast efficient on CPU? + Eigen::array red_axis = {0}; + z.tb<1>().device(*dev.edevice) = (fx.tb<2>() * dEdf.tb<2>()).sum(red_axis); + Eigen::array bcast = {(int)xs[0]->d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + dEdxi.tb<2>().device(*dev.edevice) += (dEdf.tb<2>() - z.tvec().reshape(morph).broadcast(bcast)) * fx.tb<2>(); +} +DYNET_NODE_INST_DEV_IMPL(Softmax) + +template +void SoftSign::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SoftSign::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(FSoftSign()); +} + +template +void SoftSign::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FSoftSignBackward()); +} +DYNET_NODE_INST_DEV_IMPL(SoftSign) + +template +void Sparsemax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs[0]->d.cols() == 1) { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("Sparsemax not implemented for CUDA"); +#else + const unsigned rows = xs[0]->d.rows(); + float *zs = static_cast(aux_mem); + std::partial_sort_copy(xs[0]->v, xs[0]->v+rows, zs, zs + rows, std::greater()); + float sum = 0, maxsum = 0; + unsigned k = 0; + for (k = 0; k < rows; ++k) { + sum += zs[k]; + float t = 1 + (k + 1) * zs[k]; + if (t <= sum) break; + maxsum = sum; + } + float tau = (maxsum - 1) / k; + auto y = *fx; + fx.tvec() = (xs[0]->tvec() - tau).cwiseMax(0.f); + int c = 1; + int *cc = static_cast(aux_mem); + for (unsigned i = 0; i < rows; ++i) + if (y(i,0) > 0.f) cc[c++] = i; + cc[0] = c - 1; +#endif + } else { + DYNET_RUNTIME_ERR("Sparsemax not yet implemented for multiple columns"); + } +} + +template +void Sparsemax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("Sparsemax not implemented for CUDA"); +#else + const int ssize = static_cast(aux_mem)[0]; + int *support = static_cast(aux_mem) + 1; + float dhat = 0; + auto& d = *dEdf; + for (int i = 0; i < ssize; ++i) + dhat += d(support[i], 0); + dhat /= ssize; + for (int i = 0; i < ssize; ++i) + (*dEdxi)(support[i], 0) += d(support[i], 0) - dhat; +#endif +} +DYNET_NODE_INST_DEV_IMPL(Sparsemax) + +template +void SparsemaxLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs[0]->d.cols() == 1) { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("SparsemaxLoss not implemented for CUDA"); +#else + const int rows = xs[0]->d.rows(); + if (rows > MAX_SPARSEMAX_LOSS_ROWS) + DYNET_RUNTIME_ERR("MAX_SPARSEMAX_LOSS_ROWS is not sufficient. Recompile with larger value."); + const unsigned qsupport_size = pq->size(); + const float qprop = 1.f / qsupport_size; + + float *zs = static_cast(aux_mem); + std::partial_sort_copy(xs[0]->v, xs[0]->v+rows, zs, zs + rows, std::greater()); + float sum = 0, maxsum = 0; + int k = 0; + for (k = 0; k < rows; ++k) { + sum += zs[k]; + float t = 1 + (k + 1) * zs[k]; + if (t <= sum) break; + maxsum = sum; + } + float tau = (maxsum - 1) / k; + Tensor tsm(xs[0]->d, (float*)aux_mem, xs[0]->device, DeviceMempool::FXS); + tsm.t<1>() = (xs[0]->t<1>() - tau).cwiseMax(0.f); + fx.t<0>() = ( (tsm.t<1>() != 0.f).cast() * (xs[0]->t<1>().square() - (tau * tau)) ).sum(); + fx.t<0>() = ( fx.t<0>() + qprop * qprop * qsupport_size ) / 2.f; + for (unsigned i = 0; i < qsupport_size; ++i) + fx.t<0>() = fx.t<0>() - xs[0]->t<1>().chip<0>((*pq)[i]) * qprop; + fx.t<0>() = fx.t<0>().cwiseMax(0.f); +#endif + } else { + DYNET_RUNTIME_ERR("SparsemaxLoss not yet implemented for multiple columns"); + } +} + +template +void SparsemaxLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("SparsemaxLoss not implemented for CUDA"); +#else + const float d = dEdf.v[0]; + float* psm = static_cast(aux_mem); + float dqprop = d / pq->size(); + Tensor tsm(xs[0]->d, psm, xs[0]->device, DeviceMempool::FXS); + auto sm = *tsm; // sparsemax(z) + *dEdxi += sm * d; + for (unsigned i = 0; i < pq->size(); ++i) + (*dEdxi)((*pq)[i], 0) -= dqprop; +#endif +} +DYNET_NODE_INST_DEV_IMPL(SparsemaxLoss) + +template +void Square::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().square(); +} + +template +void Square::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * xs[0]->tvec() * 2.f; +} +DYNET_NODE_INST_DEV_IMPL(Square) + +template +void SquaredEuclideanDistance::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in SquaredEuclideanDistance::forward"); + Eigen::array red_axis = {0}; + if(xs[0]->d.bd == xs[1]->d.bd) { + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec() - xs[1]->tbvec()).square().sum(red_axis); + } else if(xs[0]->d.bd == 1) { + Eigen::array bcast = {1, xs[1]->d.bd}; + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec().broadcast(bcast) - xs[1]->tbvec()).square().sum(red_axis); + } else { + Eigen::array bcast = {1, xs[0]->d.bd}; + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec() - xs[1]->tbvec().broadcast(bcast)).square().sum(red_axis); + } +} + +template +void SquaredEuclideanDistance::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in SquaredEuclideanDistance::backward"); + float multiplier = (i == 1 ? -2.0f : 2.0f); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tbvec().device(*dev.edevice) += (xs[0]->tbvec() - xs[1]->tbvec()) * dEdf.tbvec().broadcast(bcast) * multiplier; + } else if(xs[0]->d.bd == 1) { + Eigen::array batchcast = {1, xs[1]->d.bd}; + if(i == 1) { + dEdxi.tbvec().device(*dev.edevice) += (xs[0]->tbvec().broadcast(batchcast) - xs[1]->tbvec()) * dEdf.tbvec().broadcast(bcast) * multiplier; + } else { + Eigen::array red_axis = {1}; + dEdxi.tvec().device(*dev.edevice) += ((xs[0]->tbvec().broadcast(batchcast) - xs[1]->tbvec()) * dEdf.tbvec().broadcast(bcast) * multiplier).sum(red_axis); + } + } else { + Eigen::array batchcast = {1, xs[0]->d.bd}; + if(i == 0) { + dEdxi.tbvec().device(*dev.edevice) += (xs[0]->tbvec() - xs[1]->tbvec().broadcast(batchcast)) * dEdf.tbvec().broadcast(bcast) * multiplier; + } else { + Eigen::array red_axis = {1}; + dEdxi.tvec().device(*dev.edevice) += ((xs[0]->tbvec() - xs[1]->tbvec().broadcast(batchcast)) * dEdf.tbvec().broadcast(bcast) * multiplier).sum(red_axis); + } + } +} +DYNET_NODE_INST_DEV_IMPL(SquaredEuclideanDistance) + +template +void SquaredNorm::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in SquaredNorm::forward"); + Eigen::array red_axis = {0}; + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().square().sum(red_axis); +} + +template +void SquaredNorm::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 1, "Failed dimension check in SquaredNorm::backward"); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + dEdxi.tbvec().device(*dev.edevice) += xs[0]->tbvec() * dEdf.tbvec().broadcast(bcast) * 2.0f; +} +DYNET_NODE_INST_DEV_IMPL(SquaredNorm) + +template +void L2Norm::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in L2Norm::forward"); + Eigen::array red_axis = {0}; + fx.tb<0>().device(*dev.edevice) = (xs[0]->tbvec().square().sum(red_axis) / (float) xs[0]->d.batch_size()).sqrt() ; +} + +template +void L2Norm::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 1, "Failed dimension check in L2Norm::backward"); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + dEdxi.tbvec().device(*dev.edevice) += xs[0]->tbvec() * ((fx.tvec() / (float) xs[0]->d.batch_size()).binaryExpr(dEdf.tvec(), FSqrtBackward())).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(L2Norm) + +template +void Sqrt::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().sqrt(); +} + +template +void Sqrt::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FSqrtBackward()); +} +DYNET_NODE_INST_DEV_IMPL(Sqrt) + +template +void Abs::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().abs(); +} + +template +void Abs::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * xs[0]->tvec().sign(); +} +DYNET_NODE_INST_DEV_IMPL(Abs) + +template +void Sum::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const unsigned num_args = xs.size(); + if (num_args == 1) + fx.v = xs[0]->v; + else if (num_args == 2 && xs[0]->d.bd == xs[1]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec(); + else if (num_args == 3 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec(); + else if (num_args == 4 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd && xs[2]->d.bd == xs[3]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); + else { + bool allSameBatchSize = std::all_of(xs.begin(), xs.end(), [&](const Tensor* x) { return x->d.bd == xs[0]->d.bd;}); + if (allSameBatchSize) { + // Since they are all the same batch size, we can easily unroll the addition (results in lower GPU latency by merging multiple adds together in one CUDA call): + DYNET_ASSERT(num_args > 4, "Bad loop unrolling in Sum::forward"); // If it was <=4, we would have handled it in the special cases above + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); + + const unsigned remainder = (num_args - 4 ) % 4; + switch (remainder) { + case 0: break; + case 1: fx.tvec().device(*dev.edevice) += xs[4]->tvec(); break; + case 2: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec(); break; + case 3: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec() + xs[6]->tvec(); break; + } + for (unsigned i = 4 + remainder; i < num_args; i += 4) + fx.tvec().device(*dev.edevice) += xs[i]->tvec() + xs[i + 1]->tvec() + xs[i + 2]->tvec() + xs[i + 3]->tvec(); + } + else { + // Not all the same batch size, so need to broadcast in the cases where they differ + TensorTools::zero(fx); +#if __CUDACC__ + Eigen::array bcast({ 1, (int)fx.d.bd }); +#endif + for (unsigned i = 0; i < num_args; ++i) { + if (xs[i]->d.bd == fx.d.bd) { + fx.tvec().device(*dev.edevice) += xs[i]->tvec(); + } + else { +#if __CUDACC__ + fx.tbvec().device(*dev.edevice) += xs[i]->tbvec().broadcast(bcast); +#else + for (unsigned b = 0; b < fx.d.bd; ++b) + fx.tbvec().chip<1>(b).device(*dev.edevice) += xs[i]->tvec(); +#endif + } + } + } + } +} + +template +void Sum::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if(dEdxi.d.bd == fx.d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + Eigen::array red_axis = {1}; + dEdxi.tvec().device(*dev.edevice) += dEdf.tbvec().sum(red_axis); + } +} +DYNET_NODE_INST_DEV_IMPL(Sum) + +template +void SumElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SumElements::forward"); + Eigen::array red_axis; red_axis[0] = 0; + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis); +} + +template +void SumElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in SumElements::backward"); + Eigen::array bcast = {(int)xs[0]->d.batch_size(), 1}; + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(SumElements) + +template +void MomentElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in MomentElements::forward"); + Eigen::array red_axis; red_axis[0] = 0; + if(order == 1) + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis) / (float) xs[0]->d.batch_size(); + else if (order == 2) + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().square().sum(red_axis) / (float) xs[0]->d.batch_size(); + else + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().pow(order).sum(red_axis) / (float) xs[0]->d.batch_size(); +} + +template +void MomentElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentElements::backward"); + Eigen::array bcast = {(int)xs[0]->d.batch_size(), 1}; + if (order == 1) + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast) / (float) xs[0]->d.batch_size(); + else if (order == 2) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec()) * ( 2.f / (float) xs[0]->d.batch_size()); + else if (order == 3) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().square()) * ( 3.f / (float) xs[0]->d.batch_size()); + else + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().pow(order - 1)) * ( (float) order / (float) xs[0]->d.batch_size()); +} +DYNET_NODE_INST_DEV_IMPL(MomentElements) + + +template +void StdElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in StdElements::forward"); + Eigen::array red_axis = {0}; + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + Eigen::array newaxis = {1, xs[0]->d.bd}; + float n = (float) xs[0]->d.batch_size(); + fx.tb<0>().device(*dev.edevice) = ((xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); +} + +template +void StdElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 1, "Failed dimension check in StdElements::backward"); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + Eigen::array newaxis = {1, xs[0]->d.bd}; + Eigen::array red_axis = {0}; + float n = (float) xs[0]->d.batch_size(); + dEdxi.tbvec().device(*dev.edevice) += (2 / n) * (xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)) * (fx.tbvec().binaryExpr(dEdf.tbvec(), FSqrtBackward())).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(StdElements) + +template +void MomentBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in MomentBatches::forward"); + Eigen::array red_axis; red_axis[0] = 1; + if(order == 1) + fx.t<1>().device(*dev.edevice) = xs[0]->tb<1>().sum(red_axis) / (float) xs[0]->d.bd; + else if (order == 2) + fx.t<1>().device(*dev.edevice) = xs[0]->tb<1>().square().sum(red_axis) / (float) xs[0]->d.bd; + else + fx.t<1>().device(*dev.edevice) = xs[0]->tb<1>().pow(order).sum(red_axis) / (float) xs[0]->d.bd; +} + +template +void MomentBatches::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentBatches::backward"); + Eigen::array bcast = {1, (int)xs[0]->d.bd}; + if (order == 1) + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast) / (float) xs[0]->d.bd; + else if (order == 2) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec()) * ( 2.f / (float) xs[0]->d.bd); + else if (order == 3) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().square()) * ( 3.f / (float) xs[0]->d.bd); + else + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().pow(order - 1)) * ( (float) order / (float) xs[0]->d.bd); +} +DYNET_NODE_INST_DEV_IMPL(MomentBatches) + +template +void MomentDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); + Eigen::array reduction_axis = {(int)dimension}; + float n = (float) xs[0]->d[dimension]; + if(order == 1) + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().sum(reduction_axis) / n; + else if (order == 2) + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().square().sum(reduction_axis) / n; + else + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().pow(order).sum(reduction_axis) / n; +} + +template +void MomentDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentDimension::backward"); + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; + Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; + float n = (float) xs[0]->d[dimension]; + if (order == 1) + dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<2>().reshape(morph).broadcast(bcast) / n; + else if (order == 2) + dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>()) * ( 2.f / n); + else if (order == 3) + dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>().square()) * ( 3.f / n); + else + dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>().pow(order - 1)) * ( (float) order / n); +} +DYNET_NODE_INST_DEV_IMPL(MomentDimension) + +template +void StdDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); + Eigen::array red_axis = {(int)dimension}; + Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; + float n = (float) xs[0]->d[dimension]; + fx.tb<2>().device(*dev.edevice) = ((xs[0]->tb<3>() - (xs[0]->tb<3>().sum(red_axis).reshape(morph) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); +} + +template +void StdDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in StdDimension::backward"); + Eigen::array red_axis = {(int)dimension}; + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; + Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; + float n = (float) xs[0]->d[dimension]; + dEdxi.tb<3>().device(*dev.edevice) += (2 / n) * (xs[0]->tb<3>() - (xs[0]->tb<3>().sum(red_axis).reshape(morph) / n).broadcast(bcast)) * (fx.tb<2>().binaryExpr(dEdf.tb<2>(), FSqrtBackward())).reshape(morph).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(StdDimension) + + +template +void StdBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in StdBatches::forward"); + Eigen::array red_axis = {1}; + Eigen::array newaxis = {xs[0]->d.batch_size(), 1}; + Eigen::array bcast = {1, xs[0]->d.bd}; + float n = (float)xs[0]->d.bd; + fx.t<1>().device(*dev.edevice) = ((xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); +} + +template +void StdBatches::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 1, "Failed dimension check in StdBatches::backward"); + Eigen::array red_axis = {1}; + Eigen::array bcast = {1, xs[0]->d.bd}; + Eigen::array newaxis = {xs[0]->d.batch_size(), 1}; + float n = (float)xs[0]->d.bd; + dEdxi.tbvec().device(*dev.edevice) += (2 / n) * (xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)) * (fx.tbvec().binaryExpr(dEdf.tbvec(), FSqrtBackward())).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(StdBatches) + + +template +void SumBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SumBatches::forward"); + unsigned num_args = xs[0]->d.bd; +#ifdef __CUDACC__ + Eigen::array red_axis; red_axis[0] = 2; + fx.t<2>().device(*dev.edevice) = xs[0]->tb<2>().sum(red_axis); +#else + // TODO: Is this CPU version really good? Overhead can probably be reduced. + auto res = *fx; + const unsigned remainder = num_args % 4; + switch (remainder) { + case 0: res.setZero(); break; + case 1: res = xs[0]->batch_matrix(0); break; + case 2: res = xs[0]->batch_matrix(0) + xs[0]->batch_matrix(1); break; + case 3: res = xs[0]->batch_matrix(0) + xs[0]->batch_matrix(1) + xs[0]->batch_matrix(2); break; + } + for (unsigned i = remainder; i < num_args; i += 4) + res += xs[0]->batch_matrix(i) + xs[0]->batch_matrix(i+1) + xs[0]->batch_matrix(i+2) + xs[0]->batch_matrix(i+3); +#endif +} + +template +void SumBatches::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in SumBatches::backward"); +#if __CUDACC__ + Eigen::array bcast({1, 1, (int)fx.d.bd}); + dEdxi.tb<2>().device(*dev.edevice) += dEdf.tb<2>().broadcast(bcast); +#else + for (unsigned i = 0; i < dEdxi.d.bd; ++i) + dEdxi.batch_matrix(i) += *dEdf; +#endif +} +DYNET_NODE_INST_DEV_IMPL(SumBatches) + +template +void TraceOfProduct::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("TraceOfProduct not yet implemented for CUDA"); +#else + auto x1 = **xs[0]; + auto x2 = **xs[1]; + fx.v[0] = (x1 * x2.transpose()).trace(); +#endif +} + +template +void TraceOfProduct::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i < 2, "Failed dimension check in TraceOfProduce::backward"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("TraceOfProduct not yet implemented for CUDA"); +#else + const float d = dEdf.v[0]; + auto xother = **xs[1 - i]; + *dEdxi += d * xother; +#endif +} +DYNET_NODE_INST_DEV_IMPL(TraceOfProduct) + +template +void Tanh::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().tanh(); +} + +template +void Tanh::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), scalar_tanh_backward_op()); +} +DYNET_NODE_INST_DEV_IMPL(Tanh) + +template +void Transpose::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (dim.num_nonone_dims() <= 1) { + fx.v = xs[0]->v; + } else { + array order; + for(size_t i = 0; i < 5; ++i) + order[i] = (i >= dims.size() ? i : dims[i]); + fx.tb<4>().device(*dev.edevice) = xs[0]->tb<4>().shuffle(order); + } +} + +template +void Transpose::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + array order; + for(size_t i = 0; i < 5; ++i) + order[(i >= dims.size() ? i : dims[i])] = i; + dEdxi.tb<4>().device(*dev.edevice) += dEdf.tb<4>().shuffle(order); +} +DYNET_NODE_INST_DEV_IMPL(Transpose) + +template +void Zeroes::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in Zeroes::forward"); + TensorTools::zero(fx); +} + +template +void Zeroes::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(Zeroes) + +template +void RandomNormal::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomNormal::forward"); + TensorTools::randomize_normal(fx); +} + +template +void RandomNormal::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomNormal) + +template +void RandomBernoulli::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomBernoulli::forward"); + TensorTools::randomize_bernoulli(fx, p, scale); +} + +template +void RandomBernoulli::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomBernoulli) + +template +void RandomUniform::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomUniform::forward"); + TensorTools::randomize_uniform(fx, left, right); +} + +template +void RandomUniform::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomUniform) + +template +void RandomGumbel::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomGumbel::forward"); + DYNET_ARG_CHECK(mu == 0.0 && beta == 1.0, "RandomGumbel only supports Gumbel(0,1) at the moment (pull requests welcome)"); + TensorTools::randomize_uniform(fx, 0, 1); + fx.tvec().device(*dev.edevice) = -(-fx.tvec().log()).log(); +} + +template +void RandomGumbel::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomGumbel) + +template +void MaxDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::DenseIndex* maxmap = static_cast(aux_mem); + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(maxmap, first_dim_size, second_dim_size, batch_size); + const Eigen::array reduction_axis = {reduced_dim}; + locs.device(*dev.edevice) = xs[0]->tb<3>().argmax(reduced_dim); + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().maximum(reduction_axis); +} + +template +void MaxDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MaxDimension::backward"); +#ifdef __CUDACC__ + vector indices(dim.size()); + Eigen::DenseIndex* maxmap = &indices[0]; + CUDA_CHECK(cudaMemcpy((void*)maxmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); +#else + Eigen::DenseIndex* maxmap = static_cast(aux_mem); +#endif + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(maxmap, first_dim_size, second_dim_size, batch_size); + for(unsigned b = 0; b < batch_size; ++b){ + for(unsigned j = 0; j < second_dim_size; ++j){ + for(unsigned i = 0; i < first_dim_size; ++i){ + if (reduced_dim > second_dim) + dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, b), reduced_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else if (reduced_dim > first_dim) + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, j, b), reduced_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(i, j, b), reduced_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(MaxDimension) + +template +void MinDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::DenseIndex* minmap = static_cast(aux_mem); + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(minmap, first_dim_size, second_dim_size, batch_size); + const Eigen::array reduction_axis = {reduced_dim}; + locs.device(*dev.edevice) = xs[0]->tb<3>().argmin(reduced_dim); + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().minimum(reduction_axis); +} + +template +void MinDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MinDimension::backward"); +#ifdef __CUDACC__ + vector indices(dim.size()); + Eigen::DenseIndex* minmap = &indices[0]; + CUDA_CHECK(cudaMemcpy((void*)minmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); +#else + Eigen::DenseIndex* minmap = static_cast(aux_mem); +#endif + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(minmap, first_dim_size, second_dim_size, batch_size); + for(unsigned b = 0; b < batch_size; ++b){ + for(unsigned j = 0; j < second_dim_size; ++j){ + for(unsigned i = 0; i < first_dim_size; ++i){ + if (reduced_dim > second_dim) + dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, b), reduced_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else if (reduced_dim > first_dim) + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, j, b), reduced_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(i, j, b), reduced_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(MinDimension) + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/nodes.h b/thirdparty/dynet/dynet/nodes.h new file mode 100644 index 000000000..43b3b7fd0 --- /dev/null +++ b/thirdparty/dynet/dynet/nodes.h @@ -0,0 +1,733 @@ +#ifndef DYNET_NODES_H_ +#define DYNET_NODES_H_ + +#include "dynet/dynet.h" +#include "dynet/devices.h" +#include "dynet/nodes-macros.h" + +// See nodes-macros.h for more details about DYNET_NODE_DEFINE_DEV_IMPL(). + +namespace dynet { + +// M = x_0, v = x_1 +// y = M + v (broadcasting over columns) +struct AddVectorToAllColumns : public Node { + explicit AddVectorToAllColumns(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = L_sparsemax(x_0; q) +// where x_0 is a vector of "unnormalized" probabilities +// q are the vector of labels +struct SparsemaxLoss : public Node { + explicit SparsemaxLoss(const std::initializer_list& a, const std::vector& target) : Node(a), q(target), pq(&q) {} + explicit SparsemaxLoss(const std::initializer_list& a, const std::vector* ptarget) : Node(a), q(), pq(ptarget) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + const std::vector q; + const std::vector* pq; +}; + +// y = sparsemax(x) +// y = arg min_y ||y - x||^2 +struct Sparsemax : public Node { + explicit Sparsemax(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; +}; + +// y = inv(x) +// x = an invertible matrix +struct MatrixInverse : public Node { + explicit MatrixInverse(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = select_rows(x, rows) +// x = a matrix +struct SelectRows : public Node { + explicit SelectRows(const std::initializer_list& a, const std::vector& r) : Node(a), rows(r), prows(&rows) {} + explicit SelectRows(const std::initializer_list& a, const std::vector* pr) : Node(a), prows(pr) {} + DYNET_NODE_DEFINE_DEV_IMPL() + std::vector rows; + const std::vector* prows; +}; + +// y = select_cols(x, cols) +// x = a matrix +struct SelectCols : public Node { + explicit SelectCols(const std::initializer_list& a, const std::vector& c) : Node(a), cols(c), pcols(&cols) {} + explicit SelectCols(const std::initializer_list& a, const std::vector* pc) : Node(a), pcols(pc) {} + DYNET_NODE_DEFINE_DEV_IMPL() + std::vector cols; + const std::vector* pcols; +}; + +// y = pow(x_1, x_2) +// x_2 raise every element in x_1 to the power of scalar x_2 +struct Pow : public Node { + explicit Pow(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = min{x_1, x_2} +struct Min : public Node { + explicit Min(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; +}; + +// y = max{x_1, x_2} +struct Max : public Node { + template explicit Max(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; +}; + +// y = Tr(x_1 * x_2^T) +struct TraceOfProduct : public Node { + explicit TraceOfProduct(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = alpha * x_1 +struct ConstScalarMultiply : public Node { + explicit ConstScalarMultiply(const std::initializer_list& a, float alpha) : Node(a), alpha(alpha) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + float alpha; +}; + +// y = x_1^T . x_2 +struct DotProduct : public Node { + explicit DotProduct(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1^T +// NOTE: if you have a column or row vector as input, runtime is constant +// if you have a matrix as input, the runtime is O(mn) - try to avoid using this +struct Transpose : public Node { + explicit Transpose(const std::initializer_list& a, const std::vector & dims) : Node(a), dims(dims) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + std::vector dims; +}; + +// y = reshape(x_1, --> to) +struct Reshape : public Node { + explicit Reshape(const std::initializer_list& a, const Dim& to) : Node(a), to(to) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + Dim to; +}; + +// y_i = \sum_{j=1}^n x_1:{i-1+j} +struct KMHNGram : public Node { + explicit KMHNGram(const std::initializer_list& a, unsigned n) : Node(a), n(n) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned n; // width, n=2 for Karl's paper +}; + +// n_{i,j} ~ N(0,stddev) +// y = x + n +struct GaussianNoise : public Node { + explicit GaussianNoise(const std::initializer_list& a, real stddev) : Node(a), stddev(stddev) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + real stddev; +}; + +// y = dropout(x,p) where p specifies the dropout probability +struct Dropout : public Node { + explicit Dropout(const std::initializer_list& a, real p) : Node(a), p(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + real p; +}; + +// y = dropout(x,p) where p specifies the dropout probability +struct DropoutDim : public Node { + explicit DropoutDim(const std::initializer_list& a, unsigned d,real p) : Node(a), dimension(d), p(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + unsigned dimension; + real p; +}; + +// y = dropout(x,p) where p specifies the dropout probability +struct DropoutBatch : public Node { + explicit DropoutBatch(const std::initializer_list& a, real p) : Node(a), p(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + real p; +}; + +// y = block_dropout(x,p) where p specifies the probability for dropping-out the entire block +struct BlockDropout : public Node { + explicit BlockDropout(const std::initializer_list& a, real p) : Node(a), dropout_probability(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + real dropout_probability; +}; + +// y = c + x_1 +// (c is a vector or matrix of the constant, usually 1, but can be configured) +struct ConstantPlusX : public Node { + explicit ConstantPlusX(const std::initializer_list& a, real o) : Node(a), c(o) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + real c; +}; + +// y = c - x_1 +// (c is a vector or matrix of the constant, usually 1, but can be configured) +struct ConstantMinusX : public Node { + explicit ConstantMinusX(const std::initializer_list& a, real o) : Node(a), c(o) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + real c; +}; + +// y = sqrt x_1 +struct Sqrt : public Node { + explicit Sqrt(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = abs x_1 +struct Abs : public Node { + explicit Abs(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = erf x_1 +struct Erf : public Node { + explicit Erf(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = tanh x_1 +struct Tanh : public Node { + explicit Tanh(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \odot x_1 +struct Square : public Node { + explicit Square(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \odot x_1 \odot x_1 +struct Cube : public Node { + explicit Cube(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = exp x_1 +struct Exp : public Node { + explicit Exp(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = lgamma x_1 +struct LogGamma : public Node { + explicit LogGamma(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = log x_1 (base e, i.e., natural log) +struct Log : public Node { + explicit Log(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// concatenate rows +struct Concatenate : public Node { + template explicit Concatenate(const T& a, unsigned d) : Node(a), dimension(d) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + // src_row_indices[i] says what row in fx the ith x vector was assigned to + // used to simplify backprop + mutable std::vector src_indices; + unsigned dimension; +}; + +// concatenate different batched experssions into one single batched tensor +struct ConcatenateToBatch : public Node { + template explicit ConcatenateToBatch(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override {return true;} + mutable std::vector src_element_indices; +}; + +// x_1 is a scalar (or row vector) +// x_2 is a scalar (or row vector) +// y = max(0, margin - x_1 + x_2) +struct PairwiseRankLoss : public Node { + explicit PairwiseRankLoss(const std::initializer_list& a, real m = 1.0) : Node(a), margin(m) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + real margin; +}; + +// Let x be a vector-valued input, x_i represents the score of the ith element, then +// y = \sum{i != element} max{0, margin - x_element + x_i} +struct Hinge : public Node { + explicit Hinge(const std::initializer_list& a, unsigned e, real m = 1.0) : Node(a), element(e), pelement(&element), margin(m) {} + explicit Hinge(const std::initializer_list& a, const unsigned* pe, real m = 1.0) : Node(a), element(), pelement(pe), margin(m) {} + explicit Hinge(const std::initializer_list& a, const std::vector& e, real m = 1.0) : Node(a), element(), pelement(), elements(e), pelements(&elements), margin(m) {} + explicit Hinge(const std::initializer_list& a, const std::vector* pe, real m = 1.0) : Node(a), element(), pelement(), elements(), pelements(pe), margin(m) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + unsigned element; + const unsigned* pelement; + std::vector elements; + const std::vector* pelements; + real margin; +}; + +// y = x_1, but dy/dx is set to 0 +struct NoBackprop : public Node { + explicit NoBackprop(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1, dy/dx is set to negative. +struct FlipGradient : public Node { + explicit FlipGradient(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 +struct Identity : public Node { + explicit Identity(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// hyperparameter: width > 1 +// x_1 is a vector in R^n, which we write x +// y is a vector in R^{n / width} +// y_i = max_{x_{i * width - width + 1}, ..., x_{i * width}} +struct MaxPooling1D : public Node { + MaxPooling1D(const std::initializer_list& a, unsigned w) : Node(a), width(w) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned width; + mutable std::vector ind; +}; + +// y = x_1 * x_2 +struct MatrixMultiply : public Node { + explicit MatrixMultiply(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \cdot x_2 (Hadamard product) +struct CwiseMultiply : public Node { + explicit CwiseMultiply(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 + x_2 (Addition where x_2 is a scalar) +struct ScalarAdd : public Node { + explicit ScalarAdd(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \cdot x_2 (Hadamard product where x_1 is a scalar) +struct ScalarMultiply : public Node { + explicit ScalarMultiply(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 / x_2 (Elementwise division where x_2 is a scalar) +struct ScalarQuotient : public Node { + explicit ScalarQuotient(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 / x_2 (cwiseQuotient) +struct CwiseQuotient : public Node { + explicit CwiseQuotient(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \sum_{i=2, 4 ...} A_i * x_{i+1} +struct AffineTransform : public Node { + template explicit AffineTransform(const T& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + mutable float* dEdf_mem; +}; + +// y = -x_1 +struct Negate : public Node { + explicit Negate(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = max(0,x) +struct Rectify : public Node { + explicit Rectify(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// you could do this with LogisticSigmoid, Softmax or a variety of other +// functions, but this is often useful. +// x_1 must be a vector with values between 0 and 1 +// target_y is an equivalently sized vector w values between 0 and 1 +// y = ty * log(x_1) + (1 - ty) * log(x_1) +struct BinaryLogLoss : public Node { + BinaryLogLoss(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = \log \sum_i \exp x_i +// done in log space carefully to avoid over/underflow issues +struct LogSumExp : public Node { + template explicit LogSumExp(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; +}; + +struct LogDet : public Node { + template explicit LogDet(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = \sum_i x_i +struct Sum : public Node { + template explicit Sum(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// y = \sum_i,j,... x[i,j,...] +struct SumElements : public Node { + template explicit SumElements(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// y = \sum_i x_i +struct SumBatches : public Node { + template explicit SumBatches(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// y = \sum_i,j,... x[i,j,...] +struct StdElements : public Node { + template explicit StdElements(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// y = \sum_i x_i +struct StdBatches : public Node { + template explicit StdBatches(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +//y = \sum_i x_i +struct StdDimension : public Node { + template explicit StdDimension(const T& a, unsigned d) : Node(a), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned dimension; +}; + +// y = \sum_i,j,... x[i,j,...] +struct MomentElements : public Node { + template explicit MomentElements(const T& a, unsigned o) : Node(a), order(o) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned order; +}; + +// y = \sum_i x_i +struct MomentBatches : public Node { + template explicit MomentBatches(const T& a, unsigned o) : Node(a), order(o) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned order; +}; + +//y = \sum_i x_i +struct MomentDimension : public Node { + template explicit MomentDimension(const T& a, unsigned d, unsigned o) : Node(a), dimension(d), order(o) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned dimension; + unsigned order; +}; + +// y = ( \sum_i x_i ) / |x| +struct Average : public Node { + template explicit Average(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// this is used to implement poisson regression +// x_1 = log predicted mean +// ty = true y (this is not a VariableIndex since it has to be a nonnegative integer and +// is therefore nondifferentiable. There are various continuous extensions +// using the incomplete gamma function that could be used, but meh) +// y = log Poisson(ty; \lambda = \exp x_1) +// = ty*x_1 - exp(x_1) - log(ty!) +struct PoissonRegressionLoss : public Node { + explicit PoissonRegressionLoss(const std::initializer_list& a, unsigned true_y) : Node(a), ty(true_y), pty(&ty) {} + explicit PoissonRegressionLoss(const std::initializer_list& a, const unsigned* ptrue_y) : Node(a), ty(), pty(ptrue_y) {} + DYNET_NODE_DEFINE_DEV_IMPL() + private: + unsigned ty; + const unsigned* pty; +}; + +// y = || x_1 ||^2 +struct SquaredNorm : public Node { + explicit SquaredNorm(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = || x_1 || +struct L2Norm : public Node { + explicit L2Norm(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = || x_1 - x_2 ||^2 +struct SquaredEuclideanDistance : public Node { + explicit SquaredEuclideanDistance(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = || x_1 - x_2 ||_H(d) +struct HuberDistance : public Node { + explicit HuberDistance(const std::initializer_list& a, float d = 1.345f) : Node(a), d(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + float d; +}; + +// y = || x_1 - x_2 ||_1 +struct L1Distance : public Node { + explicit L1Distance(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = \sigma(x_1) +struct LogisticSigmoid : public Node { + explicit LogisticSigmoid(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x / (1 + |x|) +struct SoftSign : public Node { + explicit SoftSign(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// z = \sum_j \exp (x_i)_j +// y_i = (x_1)_i / z +struct Softmax : public Node { + explicit Softmax(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } +}; + +// z = \sum_j \exp (x_i)_j +// y_i = (x_1)_i - \log z +struct LogSoftmax : public Node { + explicit LogSoftmax(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } +}; + +// z = \sum_j \exp (x_i)_j +// y = (x_1)_element - \log z +struct PickNegLogSoftmax : public Node { + explicit PickNegLogSoftmax(const std::initializer_list& a, unsigned v) : Node(a), val(v), pval(&val), vals(), pvals() {} + // use this constructor if you want to perform mini-batching + explicit PickNegLogSoftmax(const std::initializer_list& a, const std::vector& v) : Node(a), val(), pval(), vals(v), pvals(&vals) {} + // use these constructors if you want to change the value after the graph is constructed + explicit PickNegLogSoftmax(const std::initializer_list& a, const unsigned* pv) : Node(a), val(), pval(pv), vals(), pvals() {} + explicit PickNegLogSoftmax(const std::initializer_list& a, const std::vector* pv) : Node(a), val(), pval(), vals(), pvals(pv) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + unsigned val; + const unsigned* pval; + std::vector vals; + const std::vector* pvals; +}; + +// z = \sum_{j \in denom} \exp (x_i)_j +// y_i = (x_1)_i - \log z +struct RestrictedLogSoftmax : public Node { + explicit RestrictedLogSoftmax(const std::initializer_list& a, const std::vector& d) : Node(a), denom(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + std::vector denom; +}; + +// x_1 is a vector +// y = (x_1)_{*pval} +// this is used to implement cross-entropy training +struct PickElement : public Node { + explicit PickElement(const std::initializer_list& a, unsigned v, unsigned d = 0) : Node(a), val(v), pval(&val), vals(), pvals(), dimension(d) {} + // use this constructor if you want to perform mini-batching + explicit PickElement(const std::initializer_list& a, const std::vector& v, unsigned d = 0) : Node(a), val(), pval(), vals(v), pvals(&vals), dimension(d) {} + // use these constructors if you want to change the value after the graph is constructed + explicit PickElement(const std::initializer_list& a, const unsigned* pv, unsigned d = 0) : Node(a), val(), pval(pv), vals(), pvals(), dimension(d) {} + explicit PickElement(const std::initializer_list& a, const std::vector* pv, unsigned d = 0) : Node(a), val(), pval(), vals(), pvals(pv), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + unsigned val; + const unsigned* pval; + std::vector vals; + const std::vector* pvals; + unsigned dimension; +}; + +// x_1 is a tensor +// y = x_1[start:end] along dimension d +// (start inclusive, end exclusive) +struct PickRange : public Node { + explicit PickRange(const std::initializer_list& a, unsigned s, unsigned e, unsigned d = 0) : Node(a), start(s), end(e), dim(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + unsigned start, end, dim; +}; + +// x is a batched tensor +// y = (x)_{[*pval]} +struct PickBatchElements : public Node { + explicit PickBatchElements(const std::initializer_list& a, unsigned v) : Node(a), val(v), pval(&val), vals(), pvals() {} + explicit PickBatchElements(const std::initializer_list& a, const std::vector& v) : Node(a), val(), pval(), vals(v), pvals(&vals) {} + explicit PickBatchElements(const std::initializer_list& a, const unsigned* pv) : Node(a), val(), pval(pv), vals(), pvals() {} + explicit PickBatchElements(const std::initializer_list& a, const std::vector* pv) : Node(a), val(), pval(), vals(), pvals(pv) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + unsigned val; + const unsigned* pval; + std::vector vals; + const std::vector* pvals; +}; + +// represents a simple vector of 0s +struct Zeroes : public Node { + explicit Zeroes(const Dim& d) : dim(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; +}; + +// draw random noise from Normal(0, 1) +struct RandomNormal : public Node { + explicit RandomNormal(const Dim& d) : dim(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; +}; + +// draw from Bernoulli(p) +struct RandomBernoulli : public Node { + explicit RandomBernoulli(const std::initializer_list& a, const Dim& d, real p, real scale = 1.0f) : dim(d), p(p), scale(scale) { + DYNET_ASSERT(a.size() == 0, "RandomBernoulli doesn't accept nodes as input"); + } + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + real p; + real scale; +}; + +// draw a random real from Uniform(left, right) +struct RandomUniform : public Node { + explicit RandomUniform(const std::initializer_list& a, const Dim& d, real left, real right) : dim(d), left(left), right(right) { + DYNET_ASSERT(a.size() == 0, "RandomUniform doesn't accept nodes as input"); + } + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + real left, right; +}; + +// draw a random real from Uniform(left, right) +struct RandomGumbel : public Node { + explicit RandomGumbel(const std::initializer_list& a, const Dim& d, real mu, real beta) : dim(d), mu(mu), beta(beta) { + DYNET_ASSERT(a.size() == 0, "RandomGumbel doesn't accept nodes as input"); + } + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + real mu, beta; +}; + +struct MaxDimension : public Node { + explicit MaxDimension(const std::initializer_list& a, unsigned dimension = 0) : Node(a), reduced_dim(dimension) { + first_dim = reduced_dim == 0 ? 1 : 0; + second_dim = first_dim + 1 == reduced_dim ? first_dim + 2 : first_dim + 1; + } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + unsigned reduced_dim; + unsigned first_dim; + unsigned second_dim; +}; + +struct MinDimension : public Node { + explicit MinDimension(const std::initializer_list& a, unsigned dimension = 0) : Node(a), reduced_dim(dimension) { + first_dim = reduced_dim == 0 ? 1 : 0; + second_dim = first_dim + 1 == reduced_dim ? first_dim + 2 : first_dim + 1; + } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + unsigned reduced_dim; + unsigned first_dim; + unsigned second_dim; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/op-helper.h b/thirdparty/dynet/dynet/op-helper.h new file mode 100644 index 000000000..ebf107b54 --- /dev/null +++ b/thirdparty/dynet/dynet/op-helper.h @@ -0,0 +1,64 @@ +#ifndef DYNET_CUDNN_TYPES_H_ +#define DYNET_CUDNN_TYPES_H_ + +#include "dynet/dynet.h" +#include "dynet/cuda.h" + +#if HAVE_CUDNN +template +struct DataTypeToCudnnType {}; + +#define MATCH_TYPE_TO_CUDNN_TYPE(TYPE, ENUM) \ + template <> \ + struct DataTypeToCudnnType { \ + static const cudnnDataType_t value = ENUM; \ + } + +MATCH_TYPE_TO_CUDNN_TYPE(float, CUDNN_DATA_FLOAT); +MATCH_TYPE_TO_CUDNN_TYPE(double, CUDNN_DATA_DOUBLE); + +#undef MATCH_TYPE_TO_CUDNN_TYPE +#endif + +namespace dynet { + +// A helper class to allocate memory from the aux_mem pointer for complex operators +// e.g. Conv2D +struct NodeMemPool { + public: + explicit NodeMemPool() : capacity_(0), used_(0), mem_(NULL) {} + explicit NodeMemPool(const int capacity, void* mem) + : capacity_(capacity), used_(0), mem_(mem) {} + + void* allocate(size_t nbytes) { + if (used_ + nbytes > capacity_) { + std::ostringstream oss; oss + << "aux_mem_pool allocate memory failed: exceed maximally allowed size"; + throw std::runtime_error(oss.str()); + } + void* res = static_cast(mem_) + used_; + used_ += nbytes; + return res; + } + + void free() { + used_ = 0; + } + + void* head() { + return mem_; + } + + size_t size() { + return capacity_; + } + + private: + size_t capacity_; + size_t used_; + void* mem_; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/param-nodes.cc b/thirdparty/dynet/dynet/param-nodes.cc new file mode 100644 index 000000000..9d3e15baf --- /dev/null +++ b/thirdparty/dynet/dynet/param-nodes.cc @@ -0,0 +1,275 @@ +#include "dynet/param-nodes.h" + +#include +#include +#include + +#include "dynet/nodes-macros.h" +#include "dynet/weight-decay.h" + +#ifdef HAVE_CUDA +#include "dynet/gpu-ops.h" +#endif + +using namespace std; + +namespace dynet { + +#ifndef __CUDACC__ + +string ConstParameterNode::as_string(const vector& arg_names) const { + ostringstream s; + s << "const_parameters(" << dim << ") @ " << params.get(); + return s.str(); +} + +Dim ConstParameterNode::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); + return dim; +} + +string ParameterNode::as_string(const vector& arg_names) const { + ostringstream s; + s << "parameters(" << dim << ") @ " << params.get(); + return s.str(); +} + +Dim ParameterNode::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); + return dim; +} + +void ParameterNode::accumulate_grad(const Tensor& g) { + if(params.mp != nullptr) + params.get()->accumulate_grad(g); + else if(lparams.mp != nullptr) + lparams.get()->accumulate_grad(g); + else + DYNET_RUNTIME_ERR("ConstParameterNode has neither Parameter nor LookupParameter"); +} + +string InputNode::as_string(const vector& arg_names) const { + ostringstream s; + s << "constant(" << dim << ')'; + return s.str(); +} + +Dim InputNode::dim_forward(const vector& xs) const { + return dim; +} + +string SparseInputNode::as_string(const vector& arg_names) const { + ostringstream s; + s << "sparse_constant(" << dim << ')'; + return s.str(); +} + +Dim SparseInputNode::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(ids.size() == data.size(), + "Mismatch between size of ids (" << ids.size() << ") and size of data (" << data.size() << ") in SparseInput"); + return dim; +} + +size_t SparseInputNode::aux_storage_size() const { + return ids.size() * (sizeof(float) + sizeof(unsigned int)); +} + +string ScalarInputNode::as_string(const vector& arg_names) const { + ostringstream s; + s << "scalar_constant(" << pdata << ')'; + return s.str(); +} + +Dim ScalarInputNode::dim_forward(const vector& xs) const { + return Dim({1}); +} + +size_t LookupNode::aux_storage_size() const { + return dim.bd * sizeof(unsigned); +} + +string LookupNode::as_string(const vector& arg_names) const { + ostringstream s; + s << "lookup_parameters(|x|=" << params.get()->values.size() << " --> " << dim << ") @ " << params.get(); + return s.str(); +} + +Dim LookupNode::dim_forward(const vector& xs) const { + return dim; +} + +void LookupNode::accumulate_grad(const Tensor& g) { + if(pindex) { + params.get()->accumulate_grad(*pindex, g); + } else { + DYNET_ASSERT(pindices, "Have neither index nor index vector in LookupNode"); + params.get()->accumulate_grads(pindices->size(), &(*pindices)[0], (unsigned*)aux_mem, g.v); + } +} + +#endif + +template +void ConstParameterNode::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); + if(params.mp != nullptr) + fx.tvec().device(*dev.edevice) = params.get()->values.tvec() * params.mp->weight_decay.current_weight_decay(); + else if(lparams.mp != nullptr) + fx.tvec().device(*dev.edevice) = lparams.get()->all_values.tvec() * lparams.mp->weight_decay.current_weight_decay(); + else + DYNET_RUNTIME_ERR("ConstParameterNode has neither Parameter nor LookupParameter"); +} + +template +void ConstParameterNode::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("called backward() on arity 0 node: i = " << i); +} +DYNET_NODE_INST_DEV_IMPL(ConstParameterNode) + +template +void ParameterNode::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); +// TODO +// if (params->not_regularized) { +// fx.v = params->values.v; +// return; +// } + if(params.mp != nullptr) + fx.tvec().device(*dev.edevice) = params.get()->values.tvec() * params.mp->weight_decay.current_weight_decay(); + else if(lparams.mp != nullptr) + fx.tvec().device(*dev.edevice) = lparams.get()->all_values.tvec() * lparams.mp->weight_decay.current_weight_decay(); + else + DYNET_RUNTIME_ERR("ParameterNode has neither Parameter nor LookupParameter"); +} + +template +void ParameterNode::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("called backward() on arity 0 node: i = " << i); +} +DYNET_NODE_INST_DEV_IMPL(ParameterNode) + +template +void InputNode::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); +#if __CUDACC__ + cudaMemcpyAsync(fx.v, &pdata->front(), dim.size() * sizeof(float), cudaMemcpyHostToDevice); +#else + // TODO memcpy is only necessary if pdata->front() points to an unaligned location + // need to compute this value + bool is_input_address_aligned = false; + if (!is_input_address_aligned) { + memcpy(fx.v, &pdata->front(), dim.size() * sizeof(float)); + } else { + fx.v = const_cast(&pdata->front()); + } +#endif +} + +template +void InputNode::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("called backward() on arity 0 node: i = " << i); +} +DYNET_NODE_INST_DEV_IMPL(InputNode) + +template +void SparseInputNode::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); + fx.tvec().device(*dev.edevice) = fx.tvec().constant(defdata); +#if __CUDACC__ + unsigned int* ids_ptr = (unsigned int*)aux_mem; + float* data_ptr = (float*)(ids_ptr + ids.size()); + cudaMemcpyAsync(ids_ptr, &ids[0], ids.size() * sizeof(unsigned int), cudaMemcpyHostToDevice); + cudaMemcpyAsync(data_ptr, &data[0], data.size() * sizeof(float), cudaMemcpyHostToDevice); + dynet::gpu::dense_to_sparse_assign(ids.size(), ids_ptr, data_ptr, fx.v); +#else + for(size_t i = 0; i < ids.size(); ++i) + fx.v[ids[i]] = data[i]; +#endif +} + +template +void SparseInputNode::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("called backward() on arity 0 node: i = " << i); +} +DYNET_NODE_INST_DEV_IMPL(SparseInputNode) + +template +void ScalarInputNode::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); +#if __CUDACC__ + cudaMemcpyAsync(fx.v, pdata, 1 * sizeof(float), cudaMemcpyHostToDevice); +#else + fx.v[0] = *pdata; +#endif +} + +template +void ScalarInputNode::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("called backward() on arity 0 node: i = " << i); +} +DYNET_NODE_INST_DEV_IMPL(ScalarInputNode) + +template +void LookupNode::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in FUNCNAME"); + if(pindex) { + DYNET_ARG_CHECK(*pindex < params.get()->values.size(), + "Out-of-bounds attempt to access index " << *pindex << " for LookupParameter of size " << params.get()->values.size()); + DYNET_ASSERT(fx.d.batch_elems() == 1, "Batch dimension > 1 for lookup with single index"); + fx.tvec().device(*dev.edevice) = params.get()->values[*pindex].tvec() * params.mp->weight_decay.current_weight_decay(); + } else { + DYNET_ASSERT(pindices, "Have neither index nor index vector in LookupNode"); + DYNET_ARG_CHECK(fx.d.batch_elems() == pindices->size(), + "In LookupNode, in index vector size (" << pindices->size() << ") " + "doesn't match batch size in expressions (" << fx.d.batch_elems() << ")"); +#if __CUDACC__ + CUDA_CHECK(cudaMemcpyAsync((unsigned*)aux_mem, &(*pindices)[0], fx.d.bd * sizeof(unsigned), cudaMemcpyHostToDevice)); + dynet::gpu::sparse_to_dense_block_assign_and_multiply(fx.d.bd, (unsigned*)aux_mem, fx.d.batch_size(), params.mp->weight_decay.current_weight_decay(), params.get()->all_values.v, fx.v); +#else + for (unsigned b = 0; b < pindices->size(); ++b) { + unsigned i = pindices->at(b); + DYNET_ARG_CHECK(i < params.get()->values.size(), + "Out-of-bounds attempt to access index " << i << " for LookupParameter of size " << params.get()->values.size()); + fx.tb<2>().chip<2>(b).device(*dev.edevice) = params.get()->values[i].t<2>() * params.mp->weight_decay.current_weight_decay(); + } +#endif + } +} + +template +void LookupNode::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("called backward() on arity 0 node: i = " << i); +} +DYNET_NODE_INST_DEV_IMPL(LookupNode) + +} // namespace dynet diff --git a/thirdparty/dynet/dynet/param-nodes.h b/thirdparty/dynet/dynet/param-nodes.h new file mode 100644 index 000000000..b2774e327 --- /dev/null +++ b/thirdparty/dynet/dynet/param-nodes.h @@ -0,0 +1,92 @@ +#ifndef DYNET_PARAM_NODES_H_ +#define DYNET_PARAM_NODES_H_ + +#include "dynet/dynet.h" +#include "dynet/model.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +struct ParameterNodeBase : public Node { + virtual void accumulate_grad(const Tensor& g) = 0; +}; + +// represents optimizable parameters +struct ParameterNode : public ParameterNodeBase { + explicit ParameterNode(const Parameter & p) : dim(p.get()->dim), params(p) {} + explicit ParameterNode(const LookupParameter & lp) : dim(lp.get()->all_dim), lparams(lp) {} + DYNET_NODE_DEFINE_DEV_IMPL() + void accumulate_grad(const Tensor& g) override; + Dim dim; + Parameter params; + LookupParameter lparams; +}; + +// represents optimizable parameters that are being held constant +struct ConstParameterNode : public Node { + explicit ConstParameterNode(const Parameter & p) : dim(p.get()->dim), params(p) {} + explicit ConstParameterNode(const LookupParameter & lp) : dim(lp.get()->all_dim), lparams(lp) {} + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + Parameter params; + LookupParameter lparams; +}; + +// represents specified (not learned) inputs to the network +struct InputNode : public Node { + explicit InputNode(const Dim& d, const std::vector& dat) : dim(d), data(dat), pdata(&data) {} + explicit InputNode(const Dim& d, const std::vector* pdat) : dim(d), data(), pdata(pdat) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + Dim dim; + const std::vector data; + const std::vector* pdata; +}; + +// Represents specified (not learned) inputs to the network in sparse array format, +// with an optional default value. Note that indexes refer to where the memory is actually +// indexed in column-major format. When multiple batches are used they will also be +// consecutive in memory. This doesn't support pointer input, because this would require +// dynamic changing of the size of auxiliary memory on GPUs, although this could possibly +// be fixed in the future. +struct SparseInputNode : public Node { + explicit SparseInputNode(const Dim& d, const std::vector& id, const std::vector& dat, float defdat = 0.f) : dim(d), ids(id), data(dat), defdata(defdat) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + Dim dim; + const std::vector ids; + const std::vector data; + float defdata; +}; + +// represents specified (not learned) scalar inputs to the network +struct ScalarInputNode : public Node { + explicit ScalarInputNode(real s) : data(s), pdata(&data) {} + explicit ScalarInputNode(const real* ps) : data(), pdata(ps) {} + DYNET_NODE_DEFINE_DEV_IMPL() + const dynet::real data; + const dynet::real* pdata; +}; + +// represents a matrix/vector embedding of an item of a discrete set (1-hot coding) +struct LookupNode : public ParameterNodeBase { + LookupNode(LookupParameter p, unsigned ind) : dim(p.get()->dim), index(ind), pindex(&index), indices(), pindices(), params(p) {} + LookupNode(LookupParameter p, const unsigned* pind) : dim(p.get()->dim), index(), pindex(pind), indices(), pindices(), params(p) {} + LookupNode(LookupParameter p, const std::vector& indices) : dim(p.get()->dim), index(), pindex(), indices(indices), pindices(&this->indices), params(p) { dim.bd = pindices->size(); } + LookupNode(LookupParameter p, const std::vector* pindices) : dim(p.get()->dim), index(), pindex(), indices(), pindices(pindices), params(p) { dim.bd = pindices->size(); } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + void accumulate_grad(const Tensor& g) override; + Dim dim; + unsigned index; + const unsigned* pindex; + std::vector indices; + const std::vector* pindices; + LookupParameter params; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/pretrain.cc b/thirdparty/dynet/dynet/pretrain.cc new file mode 100644 index 000000000..7d557bb46 --- /dev/null +++ b/thirdparty/dynet/dynet/pretrain.cc @@ -0,0 +1,63 @@ +#include "dynet/pretrain.h" + +#include +#include +#include +#include +#include +#include "dynet/dict.h" +#include "dynet/model.h" + +using namespace std; + +namespace dynet { + +void save_pretrained_embeddings(const std::string& fname, + const Dict& d, + const LookupParameter& lp) { + cerr << "Writing word vectors to " << fname << " ...\n"; + ofstream out(fname); + if(!out) + DYNET_INVALID_ARG("Could not save embeddings to " << fname); + auto& m = *lp.get(); + for (unsigned i = 0; i < d.size(); ++i) { + out << d.convert(i) << ' ' << (*m.values[i]).transpose() << endl; + } +} + +void read_pretrained_embeddings(const std::string& fname, + Dict& d, + std::unordered_map>& vectors) { + int unk = -1; + if (d.is_frozen()) unk = d.get_unk_id(); + cerr << "Loading word vectors from " << fname << " ...\n"; + ifstream in(fname); + if(!in) + DYNET_INVALID_ARG("Could not load embeddings from " << fname); + string line; + string word; + vector v; + getline(in, line); + istringstream lin(line); + lin >> word; + while(lin) { + float x; + lin >> x; + if (!lin) break; + v.push_back(x); + } + unsigned vec_size = v.size(); + int wid = d.convert(word); + if (wid != unk) vectors[wid] = v; + while(getline(in, line)) { + istringstream lin(line); + lin >> word; + int w = d.convert(word); + if (w != unk) { + for (unsigned i = 0; i < vec_size; ++i) lin >> v[i]; + vectors[w] = v; + } + } +} + +} // dynet diff --git a/thirdparty/dynet/dynet/pretrain.h b/thirdparty/dynet/dynet/pretrain.h new file mode 100644 index 000000000..4e939daf8 --- /dev/null +++ b/thirdparty/dynet/dynet/pretrain.h @@ -0,0 +1,22 @@ +#ifndef DYNET_PRETRAIN_H +#define DYNET_PRETRAIN_H + +#include +#include +#include +#include "dynet/dict.h" +#include "dynet/model.h" + +namespace dynet { + +void save_pretrained_embeddings(const std::string& fname, + const Dict& d, + const LookupParameter& lp); + +void read_pretrained_embeddings(const std::string& fname, + Dict& d, + std::unordered_map>& vectors); + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/rnn-state-machine.cc b/thirdparty/dynet/dynet/rnn-state-machine.cc new file mode 100644 index 000000000..d7a80c771 --- /dev/null +++ b/thirdparty/dynet/dynet/rnn-state-machine.cc @@ -0,0 +1,20 @@ +#include "dynet/rnn-state-machine.h" + +#include + +#include "dynet/dynet.h" + +using namespace std; + +namespace dynet { + +void RNNStateMachine::failure(RNNOp op) { + ostringstream oss; oss << "State transition error: currently in state " << q_ << " but received operation " << op; + throw std::invalid_argument(oss.str()); +} + +DYNET_SERIALIZE_COMMIT(RNNStateMachine, DYNET_SERIALIZE_DEFINE(q_)) +DYNET_SERIALIZE_IMPL(RNNStateMachine) + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/rnn-state-machine.h b/thirdparty/dynet/dynet/rnn-state-machine.h new file mode 100644 index 000000000..bc2b98b4a --- /dev/null +++ b/thirdparty/dynet/dynet/rnn-state-machine.h @@ -0,0 +1,47 @@ +#ifndef DYNET_RNN_STATE_MACHINE_H_ +#define DYNET_RNN_STATE_MACHINE_H_ + +#include "dynet/io-macros.h" + +namespace dynet { + +// CURRENT STATE | ACTION | NEXT STATE +// --------------+---------------------+----------------- +// CREATED | new_graph | GRAPH_READY +// GRAPH_READY | start_new_sequence | READING_INPUT +// READING_INPUT | add_input | READING_INPUT +// READING_INPUT | start_new_seqeunce | READING_INPUT +// READING_INPUT | new_graph | GRAPH_READY + +enum RNNState {CREATED, GRAPH_READY, READING_INPUT}; +enum RNNOp {new_graph, start_new_sequence, add_input}; + +class RNNStateMachine { + public: + RNNStateMachine() : q_(RNNState::CREATED) {} + void failure(RNNOp op); + void transition(RNNOp op) { + switch (q_) { + case RNNState::CREATED: + if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; } + failure(op); + case RNNState::GRAPH_READY: + if (op == RNNOp::new_graph) { break; } + if (op == RNNOp::start_new_sequence) { q_ = RNNState::READING_INPUT; break; } + failure(op); + case RNNState::READING_INPUT: + if (op == RNNOp::add_input) { break; } + if (op == RNNOp::start_new_sequence) { break; } + if (op == RNNOp::new_graph) { q_ = RNNState::GRAPH_READY; break; } + failure(op); + } + } + private: + RNNState q_; + + DYNET_SERIALIZE_DECLARE() +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/rnn.cc b/thirdparty/dynet/dynet/rnn.cc new file mode 100644 index 000000000..c5b5e8c34 --- /dev/null +++ b/thirdparty/dynet/dynet/rnn.cc @@ -0,0 +1,193 @@ +#include "dynet/rnn.h" + +#include +#include +#include +#include + +#include "dynet/nodes.h" +#include "dynet/expr.h" + +using namespace std; +using namespace dynet::expr; +using namespace dynet; + +namespace dynet { + +enum { X2H=0, H2H, HB, L2H }; + +RNNBuilder::~RNNBuilder() {} + +void RNNBuilder::save_parameters_pretraining(const string& fname) const { + throw std::runtime_error("RNNBuilder::save_parameters_pretraining not overridden."); +} + +void RNNBuilder::load_parameters_pretraining(const string& fname) { + throw std::runtime_error("RNNBuilder::load_parameters_pretraining not overridden."); +} + +DYNET_SERIALIZE_COMMIT(RNNBuilder, DYNET_SERIALIZE_DEFINE(cur, head, sm)) +DYNET_SERIALIZE_IMPL(RNNBuilder) + +SimpleRNNBuilder::SimpleRNNBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model, + bool support_lags) : layers(layers), lagging(support_lags) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + Parameter p_x2h = model.add_parameters({hidden_dim, layer_input_dim}); + Parameter p_h2h = model.add_parameters({hidden_dim, hidden_dim}); + Parameter p_hb = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + vector ps = {p_x2h, p_h2h, p_hb}; + if (lagging) + ps.push_back(model.add_parameters({hidden_dim, hidden_dim})); + params.push_back(ps); + layer_input_dim = hidden_dim; + } + dropout_rate = 0.f; +} + +void SimpleRNNBuilder::new_graph_impl(ComputationGraph& cg) { + param_vars.clear(); + for (unsigned i = 0; i < layers; ++i) { + Parameter p_x2h = params[i][X2H]; + Parameter p_h2h = params[i][H2H]; + Parameter p_hb = params[i][HB]; + Expression i_x2h = parameter(cg,p_x2h); + Expression i_h2h = parameter(cg,p_h2h); + Expression i_hb = parameter(cg,p_hb); + vector vars = {i_x2h, i_h2h, i_hb}; + + if (lagging) { + Parameter p_l2h = params[i][L2H]; + Expression i_l2h = parameter(cg,p_l2h); + vars.push_back(i_l2h); + } + + param_vars.push_back(vars); + } +} + +void SimpleRNNBuilder::start_new_sequence_impl(const vector& h_0) { + h.clear(); + h0 = h_0; + DYNET_ARG_CHECK(h0.empty() || h0.size() == layers, + "Number of inputs passed to initialize RNNBuilder (" << h0.size() << ") is not equal to the number of layers (" << layers << ")"); +} + +Expression SimpleRNNBuilder::set_h_impl(int prev, const vector& h_new) { + DYNET_ARG_CHECK(h_new.empty() || h_new.size() == layers, + "Number of inputs passed to RNNBuilder::set_h() (" << h_new.size() << ") is not equal to the number of layers (" << layers << ")"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression y = h_new[i]; + h[t][i] = y; + } + return h[t].back(); +} + +Expression SimpleRNNBuilder::add_input_impl(int prev, const Expression &in) { + if(dropout_rate != 0.f) + throw std::runtime_error("SimpleRNNBuilder doesn't support dropout yet"); + const unsigned t = h.size(); + h.push_back(vector(layers)); + + Expression x = in; + + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + + // y <--- g(y_prev) + if(prev >= 0) { + x = h[t][i] = tanh( affine_transform({vars[2], vars[0], x, vars[1], h[prev][i]}) ); + } else if(h0.size() > 0) { + x = h[t][i] = tanh( affine_transform({vars[2], vars[0], x, vars[1], h0[i]}) ); + } else { + x = h[t][i] = tanh( affine_transform({vars[2], vars[0], x}) ); + } + + } + return h[t].back(); +} + +Expression SimpleRNNBuilder::add_auxiliary_input(const Expression &in, const Expression &aux) { + const unsigned t = h.size(); + h.push_back(vector(layers)); + + Expression x = in; + + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + DYNET_ASSERT(vars.size() >= L2H + 1, "Failed dimension check in SimpleRNNBuilder"); + + if(t > 0) { + x = h[t][i] = tanh( affine_transform({vars[HB], vars[X2H], x, vars[L2H], aux, vars[H2H], h[t-1][i]}) ); + } else if(h0.size() > 0) { + x = h[t][i] = tanh( affine_transform({vars[HB], vars[X2H], x, vars[L2H], aux, vars[H2H], h0[i]}) ); + } else { + x = h[t][i] = tanh( affine_transform({vars[HB], vars[X2H], x, vars[L2H], aux}) ); + } + + } + return h[t].back(); +} + +void SimpleRNNBuilder::copy(const RNNBuilder & rnn) { + const SimpleRNNBuilder & rnn_simple = (const SimpleRNNBuilder&)rnn; + DYNET_ARG_CHECK(params.size() == rnn_simple.params.size(), + "Attempt to copy between two SimpleRNNBuilders that are not the same size"); + for(size_t i = 0; i < rnn_simple.params.size(); ++i) { + params[i][0] = rnn_simple.params[i][0]; + params[i][1] = rnn_simple.params[i][1]; + params[i][2] = rnn_simple.params[i][2]; + } +} + +void SimpleRNNBuilder::save_parameters_pretraining(const string& fname) const { + cerr << "Writing parameters to " << fname << endl; + ofstream of(fname); + if (!of) + DYNET_INVALID_ARG("Could not write parameters to " << fname << " in SimpleRNNBuilder"); + boost::archive::binary_oarchive oa(of); + std::string id = "SimpleRNNBuilder:params"; + oa << id; + oa << layers; + for (unsigned i = 0; i < layers; ++i) { + for (auto p : params[i]) { + oa << p.get()->values; + } + } +} + +void SimpleRNNBuilder::load_parameters_pretraining(const string& fname) { + cerr << "Loading parameters from " << fname << endl; + ifstream of(fname); + if (!of) + DYNET_INVALID_ARG("Could not load parameters from " << fname << " in SimpleRNNBuilder"); + boost::archive::binary_iarchive ia(of); + std::string id; + ia >> id; + if (id != "SimpleRNNBuilder:params") + throw std::invalid_argument("Bad id read in SimpleRNNBuilder::load_parameters_pretraining. Bad model format?"); + unsigned l = 0; + ia >> l; + if (l != layers) + throw std::invalid_argument("Bad number of layers in SimpleRNNBuilder::load_parameters_pretraining. Bad model format?"); + // TODO check other dimensions + for (unsigned i = 0; i < layers; ++i) { + for (auto p : params[i]) { + ia >> p.get()->values; + } + } +} + +DYNET_SERIALIZE_COMMIT(SimpleRNNBuilder, DYNET_SERIALIZE_DERIVED_DEFINE(RNNBuilder, params, layers, lagging)) +DYNET_SERIALIZE_IMPL(SimpleRNNBuilder) + +} // namespace dynet + +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::RNNBuilder) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::SimpleRNNBuilder) diff --git a/thirdparty/dynet/dynet/rnn.h b/thirdparty/dynet/dynet/rnn.h new file mode 100644 index 000000000..0498e7327 --- /dev/null +++ b/thirdparty/dynet/dynet/rnn.h @@ -0,0 +1,378 @@ +/** + * \file rnn.h + * \defgroup rnnbuilders rnnbuilders + * \brief Helper structures to build recurrent units + * + * \details TODO: Create documentation and explain rnns, etc... + */ + +#ifndef DYNET_RNN_H_ +#define DYNET_RNN_H_ + +#include + +#include "dynet/dynet.h" +#include "dynet/rnn-state-machine.h" +#include "dynet/expr.h" +#include "dynet/io-macros.h" + +using namespace dynet::expr; + +namespace dynet { + +class Model; + +BOOST_STRONG_TYPEDEF(int, RNNPointer) +inline void swap(RNNPointer& i1, RNNPointer& i2) { + RNNPointer t = i1; i1 = i2; i2 = t; +} + +/** + * \ingroup rnnbuilders + * \brief interface for constructing an RNN, LSTM, GRU, etc. + * \details [long description] + */ +struct RNNBuilder { + /** + * + * \brief Default constructor + */ + RNNBuilder() : cur(-1) {} + virtual ~RNNBuilder(); + + /** + * + * \brief Get pointer to the current state + * + * \return Pointer to the current state + */ + RNNPointer state() const { return cur; } + + /** + * + * \brief Initialize with new computation graph + * \details call this to reset the builder when you are working with a newly + * created ComputationGraph object + * + * \param cg Computation graph + */ + void new_graph(ComputationGraph& cg) { + sm.transition(RNNOp::new_graph); + new_graph_impl(cg); + } + + /** + * + * \brief Reset for new sequence + * \details call this before add_input and after new_graph, + * when starting a new sequence on the same hypergraph. + * + * \param h_0 `h_0` is used to initialize hidden layers at timestep 0 to given values + */ + void start_new_sequence(const std::vector& h_0 = {}) { + sm.transition(RNNOp::start_new_sequence); + cur = RNNPointer(-1); + head.clear(); + start_new_sequence_impl(h_0); + } + + // + /** + * + * \brief Explicitly set the output state of a node + * + * \param prev Pointer to the previous state + * \param h_new The new hidden state + * + * \return The hidden representation of the deepest layer + */ + Expression set_h(const RNNPointer& prev, const std::vector& h_new = {}) { + sm.transition(RNNOp::add_input); + head.push_back(prev); + cur = head.size() - 1; + return set_h_impl(prev, h_new); + } + + // + /** + * + * \brief Set the internal state of a node (for lstms/grus) + * \details For RNNs without internal states (SimpleRNN, GRU...), + * this has the same behaviour as `set_h` + * + * \param prev Pointer to the previous state + * \param s_new The new state. Can be `{new_c[0],...,new_c[n]}` + * or `{new_c[0],...,new_c[n], new_h[0],...,new_h[n]}` + * + * \return The hidden representation of the deepest layer + */ + Expression set_s(const RNNPointer& prev, const std::vector& s_new = {}) { + sm.transition(RNNOp::add_input); + head.push_back(prev); + cur = head.size() - 1; + return set_s_impl(prev, s_new); + } + + /** + * + * \brief Add another timestep by reading in the variable x + * + * \param x Input variable + * + * \return The hidden representation of the deepest layer + */ + Expression add_input(const Expression& x) { + sm.transition(RNNOp::add_input); + head.push_back(cur); + int rcp = cur; + cur = head.size() - 1; + return add_input_impl(rcp, x); + } + + /** + * + * \brief Add another timestep, with arbitrary recurrent connection. + * \details This allows you to define a recurrent connection to `prev` + * rather than to `head[cur]`. + * This can be used to construct trees, implement beam search, etc. + * + * \param prev Pointer to the previous state + * \param x Input variable + * + * \return The hidden representation of the deepest layer + */ + Expression add_input(const RNNPointer& prev, const Expression& x) { + sm.transition(RNNOp::add_input); + head.push_back(prev); + cur = head.size() - 1; + return add_input_impl(prev, x); + } + + /** + * + * \brief Rewind the last timestep + * \details - this DOES NOT remove the variables from the computation graph, + * it just means the next time step will see a different previous state. + * You can rewind as many times as you want. + */ + void rewind_one_step() { + cur = head[cur]; + } + + /** + * + * \brief Return the RNN state that is the parent of `p` + * \details - This can be used in implementing complex structures + * such as trees, etc. + */ + RNNPointer get_head(const RNNPointer& p) { + return head[p]; + } + + /** + * + * \brief Set Dropout + * + * \param d Dropout rate + */ + void set_dropout(float d) { dropout_rate = d; } + /** + * + * \brief Disable Dropout + * \details In general, you should disable dropout at test time + */ + void disable_dropout() { dropout_rate = 0; } + + /** + * + * \brief Returns node (index) of most recent output + * + * \return Node (index) of most recent output + */ + virtual Expression back() const = 0; + /** + * + * \brief Access the final output of each hidden layer + * + * \return Final output of each hidden layer + */ + virtual std::vector final_h() const = 0; + /** + * + * \brief Access the output of any hidden layer + * + * \param i Pointer to the step which output you want to access + * + * \return Output of each hidden layer at the given step + */ + virtual std::vector get_h(RNNPointer i) const = 0; + + /** + * + * \brief Access the final state of each hidden layer + * \details This returns the state of each hidden layer, + * in a format that can be used in start_new_sequence + * (i.e. including any internal cell for LSTMs and the likes) + * + * \return vector containing, if it exists, the list of final + * internal states, followed by the list of final outputs for + * each layer + */ + virtual std::vector final_s() const = 0; + /** + * + * \brief Access the state of any hidden layer + * \details See `final_s` for details + * + * \param i Pointer to the step which state you want to access + * + * \return Internal state of each hidden layer at the given step + */ + virtual std::vector get_s(RNNPointer i) const = 0; + + /** + * + * \brief Number of components in `h_0` + * + * \return Number of components in `h_0` + */ + virtual unsigned num_h0_components() const = 0; + /** + * + * \brief Copy the parameters of another builder. + * + * \param params RNNBuilder you want to copy parameters from. + */ + virtual void copy(const RNNBuilder & params) = 0; + + /** + * + * \brief This function saves all the parameters associated with + * a particular RNNBuilder's derived class to a file. + * \details This should not be used to seralize models, it should + * only be used to save parameters for pretraining. + * If you are interested in serializing models, use the boost + * serialization API against your model class. + * + * \param fname File you want to save your model to. + */ + virtual void save_parameters_pretraining(const std::string& fname) const; + /** + * + * \brief Loads all the parameters associated with a particular RNNBuilder's + * derived class from a file. + * \details This should not be used to seralize models, it should + * only be used to load parameters from pretraining. + * If you are interested in serializing models, use the boost + * serialization API against your model class. + * + * \param fname File you want to read your model from. + */ + virtual void load_parameters_pretraining(const std::string& fname); + + +protected: + virtual void new_graph_impl(ComputationGraph& cg) = 0; + virtual void start_new_sequence_impl(const std::vector& h_0) = 0; + virtual Expression add_input_impl(int prev, const Expression& x) = 0; + virtual Expression set_h_impl(int prev, const std::vector& h_new) = 0; + virtual Expression set_s_impl(int prev, const std::vector& c_new) = 0; + RNNPointer cur; + float dropout_rate; +private: + // the state machine ensures that the caller is behaving + RNNStateMachine sm; + std::vector head; // head[i] returns the head position + + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup rnnbuilders + * \brief This provides a builder for the simplest RNN with tanh nonlinearity + * \details The equation for this RNN is : + * \f$h_t=\tanh(W_x x_t + W_h h_{t-1} + b)\f$ + * + */ +struct SimpleRNNBuilder : public RNNBuilder { + SimpleRNNBuilder() = default; + /** + * + * \brief Builds a simple RNN + * + * \param layers Number of layers + * \param input_dim Dimension of the input + * \param hidden_dim Hidden layer (and output) size + * \param model Model holding the parameters + * \param support_lags Allow for auxiliary output? + */ + explicit SimpleRNNBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model, + bool support_lags = false); + +protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h_0) override; + Expression add_input_impl(int prev, const Expression& x) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override {return set_h_impl(prev, s_new);} + +public: + /** + * + * \brief Add auxiliary output + * \details Returns \f$h_t=\tanh(W_x x_t + W_h h_{t-1} + W_y y + b)\f$ + * where \f$y\f$ is an auxiliary output + * TODO : clarify + * + * \param x Input expression + * \param aux Auxiliary output expression + * + * \return The hidden representation of the deepest layer + */ + Expression add_auxiliary_input(const Expression& x, const Expression &aux); + + Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } + std::vector final_h() const override { return (h.size() == 0 ? h0 : h.back()); } + std::vector final_s() const override { return final_h(); } + + std::vector get_h(RNNPointer i) const override { return (i == -1 ? h0 : h[i]); } + std::vector get_s(RNNPointer i) const override { return get_h(i); } + void copy(const RNNBuilder & params) override; + + unsigned num_h0_components() const override { return layers; } + + void save_parameters_pretraining(const std::string& fname) const override; + void load_parameters_pretraining(const std::string& fname) override; + + // first index is layer, then x2h h2h hb + std::vector> params; + + // first index is layer, then x2h h2h hb + std::vector> param_vars; + +private: + + // first index is time, second is layer + std::vector> h; + + // initial value of h + // defaults to zero matrix input + std::vector h0; + + unsigned layers; + bool lagging; + + DYNET_SERIALIZE_DECLARE() +}; + +} // namespace dynet + +DYNET_NINTRUSIVE_SERIALIZE_DEFINE(dynet::RNNPointer & p, p.t) + +BOOST_CLASS_EXPORT_KEY(dynet::RNNBuilder) +BOOST_CLASS_EXPORT_KEY(dynet::SimpleRNNBuilder) + +#endif diff --git a/thirdparty/dynet/dynet/saxe-init.cc b/thirdparty/dynet/dynet/saxe-init.cc new file mode 100644 index 000000000..10a04f233 --- /dev/null +++ b/thirdparty/dynet/dynet/saxe-init.cc @@ -0,0 +1,27 @@ +#include "dynet/saxe-init.h" +#include "dynet/tensor.h" +#include "dynet/globals.h" + +#include +#include + +#include + +using namespace std; + +namespace dynet { + +void orthonormal_random(unsigned dd, float g, Tensor& x) { + Tensor t; + t.d = Dim({dd, dd}); + t.v = new float[dd * dd]; + normal_distribution distribution(0, 0.01); + auto b = [&] () {return distribution(*rndeng);}; + generate(t.v, t.v + dd*dd, b); + Eigen::JacobiSVD svd(*t, Eigen::ComputeFullU); + *x = svd.matrixU(); + delete[] t.v; +} + +} + diff --git a/thirdparty/dynet/dynet/saxe-init.h b/thirdparty/dynet/dynet/saxe-init.h new file mode 100644 index 000000000..47beea271 --- /dev/null +++ b/thirdparty/dynet/dynet/saxe-init.h @@ -0,0 +1,12 @@ +#ifndef DYNET_SAXE_INIT_H_ +#define DYNET_SAXE_INIT_H_ + +namespace dynet { + +struct Tensor; + +void orthonormal_random(unsigned dim, float g, Tensor& x); + +} + +#endif diff --git a/thirdparty/dynet/dynet/shadow-params.cc b/thirdparty/dynet/dynet/shadow-params.cc new file mode 100644 index 000000000..a06372e6f --- /dev/null +++ b/thirdparty/dynet/dynet/shadow-params.cc @@ -0,0 +1,60 @@ +#include "dynet/dynet.h" + +#include "dynet/shadow-params.h" +#include "dynet/tensor.h" +#include "dynet/aligned-mem-pool.h" +#include "dynet/model.h" + +#define LOAD_INIT_FUNC() initialize_lookups() + +using namespace std; + +namespace dynet { + +ShadowParameters::ShadowParameters(const ParameterStorage& p) : h(p.values) { + default_device->allocate_tensor(DeviceMempool::PS, h); + TensorTools::zero(h); +} + +ShadowLookupParameters::ShadowLookupParameters(const LookupParameterStorage& lp) : all_h(lp.all_values) { + default_device->allocate_tensor(DeviceMempool::PS, all_h); + TensorTools::zero(all_h); + initialize_lookups(); +} + +void ShadowLookupParameters::initialize_lookups() { + int num = all_h.d[all_h.d.nd-1]; + Dim dim = all_h.d; dim.nd--; + int dim_size = dim.size(); + if(h.size() == 0) { + h.resize(num); + for(int i = 0; i < num; ++i) + h[i] = Tensor(dim, all_h.v + i*dim_size, all_h.device, all_h.mem_pool); + } +} + +vector allocate_shadow_parameters(const Model& m) { + vector v; + v.reserve(m.parameters_list().size()); + for (auto& p : m.parameters_list()) + v.emplace_back(*p); + return v; +} + +vector allocate_shadow_lookup_parameters(const Model& m) { + vector v; + v.reserve(m.lookup_parameters_list().size()); + for (auto& p : m.lookup_parameters_list()) + v.emplace_back(*p); + return v; +} + +DYNET_SERIALIZE_COMMIT(ShadowParameters, DYNET_SERIALIZE_DEFINE(h)) +DYNET_SERIALIZE_IMPL(ShadowParameters) + +DYNET_SERIALIZE_SAVE_COMMIT(ShadowLookupParameters, DYNET_SERIALIZE_DEFINE(h)) +DYNET_SERIALIZE_LOAD_COMMIT(ShadowLookupParameters, LOAD_INIT_FUNC(), DYNET_SERIALIZE_DEFINE(h)) +DYNET_SAVELOAD_IMPL(ShadowLookupParameters) + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/shadow-params.h b/thirdparty/dynet/dynet/shadow-params.h new file mode 100644 index 000000000..ae1ce5c03 --- /dev/null +++ b/thirdparty/dynet/dynet/shadow-params.h @@ -0,0 +1,43 @@ +#ifndef DYNET_SHADOW_PARAMS_H +#define DYNET_SHADOW_PARAMS_H + +#include +#include "dynet/tensor.h" +#include "dynet/io-macros.h" + +// if your learner needs to keep track of an extra set of values (one per +// parameter), use the Shadow classes. this can be used to implement, e.g., +// momentum or adagrad + +namespace dynet { + +class Model; +struct ParameterStorage; +struct LookupParameterStorage; + +struct ShadowParameters { + ShadowParameters() {} + explicit ShadowParameters(const ParameterStorage& p); + Tensor h; + private: + DYNET_SERIALIZE_DECLARE() +}; + +struct ShadowLookupParameters { + ShadowLookupParameters() {} + explicit ShadowLookupParameters(const LookupParameterStorage& lp); + Tensor all_h; + std::vector h; + private: + void initialize_lookups(); + DYNET_SERIALIZE_SPLIT_DECLARE() +}; + +// one per element in model.parameters_list +std::vector allocate_shadow_parameters(const Model& model); +// one per element in model.lookup_parameters_list +std::vector allocate_shadow_lookup_parameters(const Model& model); + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/simd-functors.h b/thirdparty/dynet/dynet/simd-functors.h new file mode 100644 index 000000000..2f48ebdeb --- /dev/null +++ b/thirdparty/dynet/dynet/simd-functors.h @@ -0,0 +1,227 @@ +#ifndef DYNET_XFUNCTORS_H +#define DYNET_XFUNCTORS_H + +#ifndef __CUDACC__ +#include +#endif + +#include "dynet/functors.h" + +// these functors are implemented to exploit Eigen's internal logic for doing +// vectorized arithmetic. I'm putting them in a separate file since, if Eigen +// breaks backward compatibility by changing an internal interface, I want +// the necessary changes to be localized. +// +// to implement your own functor, you need to provide +// 1) operator() implemented on the scalar data type +// 2) packetOp implemented using vector ("packet") type +// 3) the functor_traits specialization for your functor +// that tells the compiler whether your architecture +// has vectorized support for the operations you need +// and an estimate of the cost of the operation + +namespace dynet { +template struct const_add_op { + const_add_op(const Scalar& c) : c(c) {} + DYNET_DEVICE_FUNC inline const Scalar operator() (const Scalar& x) const { + return c + x; + } + template + DYNET_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + using namespace Eigen::internal; + return padd(pset1(c), x); + } + Scalar c; +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost * 2, + PacketAccess = packet_traits::HasAdd + }; +}; +} } + +namespace dynet { +template struct const_minus_op { + const_minus_op(const Scalar& c) : c(c) {} + DYNET_DEVICE_FUNC inline const Scalar operator() (const Scalar& x) const { + return c - x; + } + template + DYNET_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + using namespace Eigen::internal; + return psub(pset1(c), x); + } + Scalar c; +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost * 2, + PacketAccess = packet_traits::HasSub + }; +}; +} } + +namespace dynet { +template struct scalar_logistic_sigmoid_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_sigmoid_op) + DYNET_DEVICE_FUNC inline const Scalar operator() (const Scalar& x) const { + using std::exp; + const Scalar one = Scalar(1); + return one / (one + exp(-x)); + } + template + DYNET_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { + using namespace Eigen::internal; + const Packet one = pset1(1); + return pdiv(one, padd(one, pexp(pnegate(x)))); + } +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost * 2 + NumTraits::MulCost * 6, + PacketAccess = packet_traits::HasAdd && packet_traits::HasDiv && + packet_traits::HasNegate && packet_traits::HasExp + }; +}; +} } + +namespace dynet { +template struct scalar_erf_backward_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_backward_op) + DYNET_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& d) const { + using std::exp; + const Scalar sqrt_pi_over2(1.1283791670955125738961589); + return sqrt_pi_over2 * exp(-x * x) * d; + } + template + DYNET_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& d) const { + using namespace Eigen::internal; + const Packet sqrt_pi_over2 = pset1(1.1283791670955125738961589); + return pmul(sqrt_pi_over2, pmul(pexp(pnegate(pmul(x, x))), d)); + } +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::MulCost * 8, + PacketAccess = packet_traits::HasExp && packet_traits::HasMul && packet_traits::HasNegate + }; +}; +} } + +namespace dynet { +template struct scalar_logistic_sigmoid_backward_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_sigmoid_backward_op) + DYNET_DEVICE_FUNC inline const Scalar operator() (const Scalar& t, const Scalar& d) const { + const Scalar one = Scalar(1); + return (one - t) * t * d; + } + template + DYNET_DEVICE_FUNC inline Packet packetOp(const Packet& t, const Packet& d) const { + using namespace Eigen::internal; + const Packet one = pset1(1); + return pmul(psub(one, t), pmul(t, d)); + } +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost + NumTraits::MulCost * 2, + PacketAccess = packet_traits::HasSub && packet_traits::HasMul + }; +}; +} } + +namespace dynet { +template struct scalar_tanh_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) + DYNET_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tanh; return tanh(a); } + template + DYNET_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return Eigen::internal::ptanh(a); } +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = 5 * NumTraits::MulCost, + PacketAccess = packet_traits::HasTanh + }; +}; +} } + +namespace dynet { +template struct scalar_tanh_backward_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_backward_op) + DYNET_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& t, const Scalar& d) const { return (1 - t * t) * d; } + template + DYNET_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& t, const Packet& d) const { + using namespace Eigen::internal; + const Packet one = pset1(1); + return pmul(psub(one, pmul(t, t)), d); + } +}; +} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost + 2 * NumTraits::MulCost, + PacketAccess = packet_traits::HasSub && packet_traits::HasMul + }; +}; +}} + +namespace dynet { +//this is slower than the dumb implementation, probably because of the pset operations +// which could be factored out into the constructor, but the Packet type isn't used +// then (and I think fixing this would be hard) +template struct scalar_nlsoftmax_backward_op { + scalar_nlsoftmax_backward_op(const Scalar& lz, const Scalar& err) : logz(lz), d(err) {} + DYNET_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& t) const { + using std::exp; + return exp(t - logz) * d; + } + template + DYNET_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& t) const { + using namespace Eigen::internal; + const Packet lz = pset1(logz); + const Packet dd = pset1(d); + return pmul(pexp(psub(t, lz)), dd); + } + Scalar logz; + Scalar d; +};} + +namespace Eigen { namespace internal { +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost + 6 * NumTraits::MulCost, + PacketAccess = packet_traits::HasSub && packet_traits::HasExp + }; +}; +}} + +#endif diff --git a/thirdparty/dynet/dynet/tensor.cc b/thirdparty/dynet/dynet/tensor.cc new file mode 100644 index 000000000..04bf0500c --- /dev/null +++ b/thirdparty/dynet/dynet/tensor.cc @@ -0,0 +1,438 @@ +#include "dynet/tensor.h" +#include "dynet/globals.h" + +#include +#include +#include + +#include +#include + +#ifdef __CUDACC__ +#include "dynet/gpu-ops.h" +#include "dynet/cuda.h" +#endif + +using namespace std; + +namespace dynet { + +// ---- CPU only operations + +#ifndef __CUDACC__ + +ostream& operator<<(ostream& os, const Tensor& t) { + if (t.device->type == DeviceType::CPU) { + os << (*t); + } else { +#if HAVE_CUDA + if (t.device->type == DeviceType::GPU) { + vector vt = as_vector(t); + Eigen::Map m(&vt[0], t.d.rows(), t.d.cols()); + os << m; + } +#endif + } + return os; +} + +real as_scalar(const Tensor& t) { + if (t.d.size() != 1) + throw std::runtime_error("Input tensor has more than one element, cannot convert to scalar."); + real res = 0.; + if (t.device->type == DeviceType::CPU) { + return t.v[0]; + } else { +#if HAVE_CUDA + if (t.device->type == DeviceType::GPU) { + CUDA_CHECK(cudaMemcpy(&res, t.v, sizeof(float), cudaMemcpyDeviceToHost)); + return res; + } +#endif + } + return res; +} + +vector as_vector(const Tensor& v) { + vector res(v.d.size()); + if (v.device->type == DeviceType::CPU) { + memcpy(&res[0], v.v, sizeof(real) * res.size()); + } else { +#if HAVE_CUDA + if (v.device->type == DeviceType::GPU) { + CUDA_CHECK(cudaMemcpy(&res[0], v.v, sizeof(real) * res.size(), cudaMemcpyDeviceToHost)); + } +#endif + } + return res; +} + +vector as_vector(const IndexTensor& v) { + vector res(v.d.size()); + if (v.device->type == DeviceType::CPU) { + memcpy(&res[0], v.v, sizeof(Eigen::DenseIndex) * res.size()); + } else { +#if HAVE_CUDA + if (v.device->type == DeviceType::GPU) { + CUDA_CHECK(cudaMemcpy(&res[0], v.v, sizeof(Eigen::DenseIndex) * res.size(), cudaMemcpyDeviceToHost)); + } +#endif + } + return res; +} + +float TensorTools::access_element(const Tensor& v, int index) { + float ret = 0.; + if (v.device->type == DeviceType::CPU) { + return v.v[index]; + } else { +#if HAVE_CUDA + if (v.device->type == DeviceType::GPU) { + cudaMemcpy(&ret, &v.v[index], sizeof(real), cudaMemcpyDeviceToHost); + return ret; + } +#endif + } + return ret; +} + +float TensorTools::access_element(const Tensor& v, const Dim& index) { +#if HAVE_CUDA + throw std::runtime_error("TensorTools::access_element(Tensor,Dim) not implemented for CUDA"); +#else + return (*v)(index[0], index[1]); +#endif +} + +void TensorTools::set_element(const Tensor& v, int index, float value) { + if (v.device->type == DeviceType::CPU) { + v.v[index] = value; + } else { +#if HAVE_CUDA + if (v.device->type == DeviceType::GPU) { + cudaMemcpyAsync(&v.v[index], &value, sizeof(real), cudaMemcpyHostToDevice); + } +#endif + } +} + +void TensorTools::copy_element(const Tensor& l, int lindex, Tensor& r, int rindex) { + if (l.device->type == DeviceType::CPU) { + r.v[rindex] = l.v[lindex]; + } else { +#if HAVE_CUDA + if (l.device != r.device) + throw std::invalid_argument("TensorTools::CopyElement doesn't support inter-device copy yet"); + if (l.device->type == DeviceType::GPU) { + cudaMemcpyAsync(&r.v[rindex], &l.v[lindex], sizeof(real), cudaMemcpyDeviceToDevice); + } +#endif + } +} + +void TensorTools::set_elements(const Tensor& v, const vector& vec) { + if (v.device->type == DeviceType::CPU) { + memcpy(v.v, &vec[0], sizeof(real) * vec.size()); + } else { +#if HAVE_CUDA + if (v.device->type == DeviceType::GPU) { + cudaMemcpyAsync(v.v, &vec[0], sizeof(real) * vec.size(), cudaMemcpyHostToDevice); + } +#endif + } +} + +void TensorTools::copy_elements(const Tensor& v, const Tensor& v_src) { + if (v.device->type == DeviceType::CPU) { + memcpy(v.v, v_src.v, sizeof(real) * v.d.size()); + } else { +#if HAVE_CUDA + if (v.device != v_src.device) + throw std::invalid_argument("TensorTools::CopyElement doesn't support inter-device copy yet"); + if (v.device->type == DeviceType::GPU) { + cudaMemcpyAsync(v.v, v_src.v, sizeof(real) * v.d.size(), cudaMemcpyDeviceToDevice); + } +#endif + } +} + +void TensorTools::zero(Tensor& d) { + constant(d, 0); +} + +void TensorTools::identity(Tensor& val) { + if (val.d.nd != 2 || val.d[0] != val.d[1]) + throw std::runtime_error("Attempt to set a tensor that is not a square matrix to identity"); + size_t pos = 0; + if (val.device->type == DeviceType::CPU) { + for (size_t i = 0; i < val.d[0]; ++i) + for (size_t j = 0; j < val.d[1]; ++j) + val.v[pos++] = (i == j ? 1 : 0); + } else { +#if HAVE_CUDA + if (val.device->type == DeviceType::GPU) { + float* t = new float[val.d.size()]; + for (size_t i = 0; i < val.d[0]; ++i) + for (size_t j = 0; j < val.d[1]; ++j) + t[pos++] = (i == j ? 1 : 0); + CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); + delete[] t; + } +#endif + } +} + +void TensorTools::randomize_bernoulli(Tensor& val, real p, real scale) { + bernoulli_distribution distribution(p); + auto b = [&] {return distribution(*rndeng) * scale;}; + if (val.device->type == DeviceType::CPU) { + generate(val.v, val.v + val.d.size(), b); + } else { +#if HAVE_CUDA + if (val.device->type == DeviceType::GPU) { + float* t = new float[val.d.size()]; + generate(t, t + val.d.size(), b); + CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); + delete[] t; + } +#endif + } +} + +void TensorTools::randomize_normal(Tensor& val, real mean, real stddev) { + normal_distribution distribution(mean, stddev); + auto b = [&] {return distribution(*rndeng);}; + if (val.device->type == DeviceType::CPU) { + generate(val.v, val.v + val.d.size(), b); + } else { +#if HAVE_CUDA + if (val.device->type == DeviceType::GPU) { + float* t = new float[val.d.size()]; + generate(t, t + val.d.size(), b); + CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); + delete[] t; + } +#endif + } +} + +void TensorTools::randomize_uniform(Tensor& val, real left, real right) { + uniform_real_distribution distribution(left, right); + auto b = [&] {return distribution(*rndeng);}; + if (val.device->type == DeviceType::CPU) { + generate(val.v, val.v + val.d.size(), b); + } else { +#if HAVE_CUDA + if (val.device->type == DeviceType::GPU) { + float* t = new float[val.d.size()]; + generate(t, t + val.d.size(), b); + CUDA_CHECK(cudaMemcpy(val.v, t, sizeof(real) * val.d.size(), cudaMemcpyHostToDevice)); + delete[] t; + } +#endif + } +} + +void TensorTools::randomize_orthonormal(Tensor& val, real scale) { + if (val.d.nd != 2 || val.d[0] != val.d[1]) + throw std::runtime_error("Attempt to set a tensor that is not a square matrix to an orthogonal matrix"); +#ifdef HAVE_CUDA + throw std::runtime_error("Orthonormal initialization not implemented in CUDA (we welcome pull requests)"); +#else + randomize_uniform(val, -1.0, 1.0); + Eigen::JacobiSVD svd(*val, Eigen::ComputeFullU | Eigen::ComputeThinV); + *val = scale * svd.matrixU(); +#endif +} + +template +void Tensor::save(Archive& ar, const unsigned int ver) const { + ar & d; + int dev_id = ((device == default_device) ? (int) - 1 : device->device_id); + ar & dev_id; + ar & mem_pool; + if (device->type == DeviceType::CPU) { + ar & boost::serialization::make_array(v, d.size()); + } else { +#ifdef HAVE_CUDA + if (device->type == DeviceType::GPU) { + float* vc = static_cast(std::malloc(d.size() * sizeof(float))); + CUDA_CHECK(cudaMemcpy(vc, v, d.size() * sizeof(float), cudaMemcpyDeviceToHost)); + ar & boost::serialization::make_array(vc, d.size()); + free(vc); + } +#endif + } +} + +template +void Tensor::load(Archive& ar, const unsigned int ver) { + ar & d; + int dev_id = -1; + // This default value is for backward compatibility with models that were + // saved without information about what mempool a tensor belongs to. + mem_pool = DeviceMempool::PS; + if (ver > 0) { + ar & dev_id; + ar & mem_pool; + } + if (dev_id == -1) { + device = default_device; + } else { + DYNET_ASSERT(dev_id > 0 && dev_id < (int)devices.size(), "Bad device id " << dev_id << " in Tensor::load with " << devices.size() << " total devices"); + device = devices[dev_id]; + } + device->allocate_tensor(mem_pool, *this); + if (device->type == DeviceType::CPU) { + ar & boost::serialization::make_array(v, d.size()); + } else { +#ifdef HAVE_CUDA + if (device->type == DeviceType::GPU) { + float* vc = static_cast(std::malloc(d.size() * sizeof(float))); + ar & boost::serialization::make_array(vc, d.size()); + CUDA_CHECK(cudaMemcpyAsync(v, vc, d.size() * sizeof(float), cudaMemcpyHostToDevice)); + free(vc); + } +#endif + } +} +DYNET_SAVELOAD_IMPL(Tensor) + +real rand01() { + uniform_real_distribution distribution(0, 1); + return distribution(*rndeng); +} + +int rand0n(int n) { + if (n <= 0) throw std::runtime_error("Integer upper bound is non-positive"); + int x = rand01() * n; + while (n == x) { x = rand01() * n; } + return x; +} + +real rand_normal() { + normal_distribution distribution(0, 1); + return distribution(*rndeng); +} + +#endif + +// ---- CPU/GPU operations +// TODO: would like to get rid of all the verbose code dispatching o the appropriate device + +template +void TensorTools::constant_dev(MyDevice & dev, Tensor& d, float c) { + d.tvec().device(*dev.edevice) = d.tvec().constant(c); +} +#ifdef __CUDACC__ +template void TensorTools::constant_dev(Device_GPU & dev, Tensor& d, float c); +#else +template void TensorTools::constant_dev(Device_CPU & dev, Tensor& d, float c); +#ifdef HAVE_CUDA +extern template void TensorTools::constant_dev(Device_GPU & dev, Tensor& d, float c); +void TensorTools::constant(Tensor& d, float c) { + if (d.device->type == DeviceType::CPU) { return constant_dev(*(Device_CPU*)d.device, d, c); } + else if (d.device->type == DeviceType::GPU) { return constant_dev(*(Device_GPU*)d.device, d, c); } + else { throw std::runtime_error("Bad device type"); } +} +#else +void TensorTools::constant(Tensor& d, float c) { + if (d.device->type == DeviceType::CPU) { return constant_dev(*(Device_CPU*)d.device, d, c); } + else { throw std::runtime_error("Bad device type"); } +} +#endif +#endif + +template +void TensorTools::clip_dev(MyDevice & dev, Tensor& d, float left, float right) { + d.tvec().device(*dev.edevice) = d.tvec().cwiseMax(left).cwiseMin(right); +} +#ifdef __CUDACC__ +template void TensorTools::clip_dev(Device_GPU & dev, Tensor& d, float left, float right); +#else +template void TensorTools::clip_dev(Device_CPU & dev, Tensor& d, float left, float right); +#ifdef HAVE_CUDA +extern template void TensorTools::clip_dev(Device_GPU & dev, Tensor& d, float left, float right); +void TensorTools::clip(Tensor& d, float left, float right) { + if (d.device->type == DeviceType::CPU) { return clip_dev(*(Device_CPU*)d.device, d, left, right); } + else if (d.device->type == DeviceType::GPU) { return clip_dev(*(Device_GPU*)d.device, d, left, right); } + else { throw std::runtime_error("Bad device type"); } +} +#else +void TensorTools::clip(Tensor& d, float left, float right) { + if (d.device->type == DeviceType::CPU) { return clip_dev(*(Device_CPU*)d.device, d, left, right); } + else { throw std::runtime_error("Bad device type"); } +} +#endif +#endif + +template +IndexTensor TensorTools::argmax_dev(MyDevice & dev, const Tensor& v, unsigned dim, unsigned num) { + if(num > 1) + DYNET_RUNTIME_ERR("Currently do not support num > 1 in argmax"); + DYNET_ARG_CHECK(v.mem_pool != DeviceMempool::NONE, "Input Tensor to TensorTools::argmax must be associated with a memory pool."); + Dim ids_dim = v.d; ids_dim.d[dim] = num; + IndexTensor ids(ids_dim, nullptr, v.device, v.mem_pool); + AlignedMemoryPool* pool = v.device->pools[(size_t)v.mem_pool]; + ids.v = static_cast(pool->allocate(ids_dim.size() * sizeof(Eigen::DenseIndex))); + ids.tb<3>().device(*dev.edevice) = v.tb<4>().argmax(dim); + return ids; +} +#ifdef __CUDACC__ +template IndexTensor TensorTools::argmax_dev(Device_GPU & dev, const Tensor& d, unsigned dim, unsigned num); +#else +template IndexTensor TensorTools::argmax_dev(Device_CPU & dev, const Tensor& d, unsigned dim, unsigned num); +#ifdef HAVE_CUDA +extern template IndexTensor TensorTools::argmax_dev(Device_GPU & dev, const Tensor& d, unsigned dim, unsigned num); +IndexTensor TensorTools::argmax(const Tensor& d, unsigned dim, unsigned num) { + if (d.device->type == DeviceType::CPU) { return argmax_dev(*(Device_CPU*)d.device, d, dim, num); } + else if (d.device->type == DeviceType::GPU) { return argmax_dev(*(Device_GPU*)d.device, d, dim, num); } + else { throw std::runtime_error("Bad device type"); } +} +#else +IndexTensor TensorTools::argmax(const Tensor& d, unsigned dim, unsigned num) { + if (d.device->type == DeviceType::CPU) { return argmax_dev(*(Device_CPU*)d.device, d, dim, num); } + else { throw std::runtime_error("Bad device type"); } +} +#endif +#endif + +template +IndexTensor TensorTools::categorical_sample_log_prob_dev(MyDevice & dev, const Tensor& v, unsigned dim, unsigned num) { + if(num > 1) + DYNET_RUNTIME_ERR("Currently do not support num > 1 in categorical_sample_log_prob"); + DYNET_ARG_CHECK(v.mem_pool != DeviceMempool::NONE, "Input Tensor to TensorTools::argmax must be associated with a memory pool."); + Dim ids_dim = v.d; ids_dim.d[dim] = num; + IndexTensor ids(ids_dim, nullptr, v.device, v.mem_pool); + AlignedMemoryPool* pool = v.device->pools[(int)v.mem_pool]; + ids.v = static_cast(pool->allocate(ids_dim.size() * sizeof(Eigen::DenseIndex))); + size_t used = pool->used(); + Dim copy_dim = v.d; // TODO: make this match num to enable num + Tensor copy(copy_dim, nullptr, v.device, v.mem_pool); + copy.v = static_cast(pool->allocate(v.d.size() * sizeof(float))); + TensorTools::randomize_uniform(copy); + ids.tb<3>().device(*dev.edevice) = (v.tb<4>() - (-copy.tb<4>().log()).log()).argmax(dim); + pool->set_used(used); + return ids; +} +#ifdef __CUDACC__ +template IndexTensor TensorTools::categorical_sample_log_prob_dev(Device_GPU & dev, const Tensor& d, unsigned dim, unsigned num); +#else +template IndexTensor TensorTools::categorical_sample_log_prob_dev(Device_CPU & dev, const Tensor& d, unsigned dim, unsigned num); +#ifdef HAVE_CUDA +extern template IndexTensor TensorTools::categorical_sample_log_prob_dev(Device_GPU & dev, const Tensor& d, unsigned dim, unsigned num); +IndexTensor TensorTools::categorical_sample_log_prob(const Tensor& d, unsigned dim, unsigned num) { + if (d.device->type == DeviceType::CPU) { return categorical_sample_log_prob_dev(*(Device_CPU*)d.device, d, dim, num); } + else if (d.device->type == DeviceType::GPU) { return categorical_sample_log_prob_dev(*(Device_GPU*)d.device, d, dim, num); } + else { throw std::runtime_error("Bad device type"); } +} +#else +IndexTensor TensorTools::categorical_sample_log_prob(const Tensor& d, unsigned dim, unsigned num) { + if (d.device->type == DeviceType::CPU) { return categorical_sample_log_prob_dev(*(Device_CPU*)d.device, d, dim, num); } + else { throw std::runtime_error("Bad device type"); } +} +#endif +#endif + +} // namespace dynet + diff --git a/thirdparty/dynet/dynet/tensor.h b/thirdparty/dynet/dynet/tensor.h new file mode 100644 index 000000000..b6a23e337 --- /dev/null +++ b/thirdparty/dynet/dynet/tensor.h @@ -0,0 +1,742 @@ +/** + * \file tensor.h + * \defgroup tensor tensor + * + */ + +#ifndef DYNET_EIGEN_TENSOR_H +#define DYNET_EIGEN_TENSOR_H + +#include +#include +#include +#include + +#include "dynet/dim.h" +#include "dynet/except.h" +#include "dynet/aligned-mem-pool.h" +#include "dynet/devices.h" +#include "dynet/io-macros.h" + +#if HAVE_CUDA +#include +#include +#include "dynet/cuda.h" +#endif + +// Following line is commented out because it causes errors with large nets (Antonis) +//#define EIGEN_NO_MALLOC + +#ifndef __CUDACC__ +#include +#endif + +#include + +namespace dynet { + +#define EIGEN_BACKEND 1 + +/** + * \ingroup tensor + * \typedef Represents a scalar + */ +typedef float real; + +/** + * \ingroup tensor + * \brief Represents a tensor of any order + * \details This provides a bridge between classic C++ types and Eigen tensors. + * + */ +struct Tensor { + /** + * \brief Create an empty tensor + */ + Tensor() : d(Dim()), v(nullptr), device(nullptr), mem_pool(DeviceMempool::NONE) { } + /** + * \brief Creates a tensor + * \details [long description] + * + * \param d Shape of the tensor + * \param v Pointer to the values + * \param dev Device + * \param mem Memory pool + */ + Tensor(const Dim& d, float* v, Device* dev, DeviceMempool mem) : d(d), v(v), device(dev), mem_pool(mem) {} + /** + * \brief Get the data as an Eigen matrix + * \return Eigen matrix + */ + Eigen::Map operator*() { + DYNET_ARG_CHECK((d.batch_elems() == 1 && d.ndims() < 3), + "Attempted to access Tensor with more than one batch element or more than two dimensions in matrix form: " << d); + return Eigen::Map(v, d.rows(), d.cols()); + } + const Eigen::Map operator*() const { + DYNET_ARG_CHECK((d.batch_elems() == 1 && d.ndims() < 3), + "Attempted to access Tensor with more than one batch element or more than two dimensions in matrix form: " << d); + return Eigen::Map(v, d.rows(), d.cols()); + } + /** + * \brief Get the data as an Eigen vector + * \details This returns the full tensor contents even if it has many dimensions + * \return Flattened tensor + */ + Eigen::Map vec() { + return Eigen::Map(v, d.size()); + } + const Eigen::Map vec() const { + return Eigen::Map(v, d.size()); + } + /** + * \brief Get the data as an order 1 Eigen tensor + * \details this returns the full tensor contents as a one dimensional Eigen tensor which can be used for on-device processing where dimensions aren't important + * \return Eigen order 1 tensor + */ + Eigen::TensorMap> tvec() { + return Eigen::TensorMap>(v, d.size()); + } + const Eigen::TensorMap> tvec() const { + return Eigen::TensorMap>(v, d.size()); + } + /** + * \brief Get the data as an order 2 tensor including batch size + * \details this returns the full tensor contents as a two dimensional Eigen tensor where the first dimension is a flattened representation of each batch and the second dimension is the batches + * \return batch size x elements per batch matrix + */ + Eigen::TensorMap> tbvec() { + return Eigen::TensorMap>(v, d.batch_size(), d.batch_elems()); + } + const Eigen::TensorMap> tbvec() const { + return Eigen::TensorMap>(v, d.batch_size(), d.batch_elems()); + } + // Get view as an Eigen Tensor (see specializations below-- this is to work Eigen's and DYNETs compile-type vs. run-time differences) + /** + * \brief Get view as a Tensor + * \tparam Order Tensor order. Order 0 through 4 are already implemented for you + * \return Eigen Tensor of the given order + */ + template Eigen::TensorMap> t(); + template const Eigen::TensorMap> t() const; + + /** + * \brief Get view as an Eigen Tensor where the final dimension is the various batches + * \tparam Order Tensor order. Order 0 through 4 are already implemented for you + * \return Eigen Tensor of the given order + 1 + */ + template Eigen::TensorMap < Eigen::Tensor < float, Order + 1 >> tb(); + template const Eigen::TensorMap < Eigen::Tensor < float, Order + 1 >> tb() const; + + /** + * \brief Get the pointer for a particular batch + * \details Automatically broadcasting if the size is zero + * + * \param bid Batch id requested + * \return Pointer to the memory where the batch values are located + */ + float* batch_ptr(unsigned bid) { + DYNET_ASSERT(d.bd == 1 || bid < d.bd, "Batch index out of bounds in batch_ptr: index=" << bid << ", dim=" << d); + return v + (bid % d.bd) * d.batch_size(); + } + const float* batch_ptr(unsigned bid) const { + DYNET_ASSERT(d.bd == 1 || bid < d.bd, "Batch index out of bounds in batch_ptr: index=" << bid << ", dim=" << d); + return v + (bid % d.bd) * d.batch_size(); + } + /** + * \brief Get the matrix for a particular batch + * \details Automatically broadcasting if the size is zero. + * + * \param bid Batch id requested + * \return Matrix at batch id `bid` (of shape `d.rows()` x `d.cols()`) + */ + Eigen::Map batch_matrix(unsigned bid) { + return Eigen::Map(v + (bid % d.bd) * d.batch_size(), d.rows(), d.cols()); + } + const Eigen::Map batch_matrix(unsigned bid) const { + return Eigen::Map(v + (bid % d.bd) * d.batch_size(), d.rows(), d.cols()); + } + /** + * \brief Get the data as a matrix, where each "row" is the concatenation of rows and columns, and each "column" is batches + * \return matrix of shape `d.rows() * d.cols()` x `d.batch_elems()` + */ + Eigen::Map rowcol_matrix() { + return Eigen::Map(v, d.rows() * d.cols(), d.batch_elems()); + } + const Eigen::Map rowcol_matrix() const { + return Eigen::Map(v, d.rows() * d.cols(), d.batch_elems()); + } + + /** + * \brief Get the data as a matrix, where each "row" is the concatenation of rows, and each "column" is the concatenation of columns and batches + * \return matrix of shape `d.rows() * d.cols()` x `d.batch_elems()` + */ + Eigen::Map colbatch_matrix() { + return Eigen::Map(v, d.rows(), d.cols() * d.batch_elems()); + } + const Eigen::Map colbatch_matrix() const { + return Eigen::Map(v, d.rows(), d.cols() * d.batch_elems()); + } + + /** + * \brief Check for NaNs and infinite values + * \details This is very slow: use sparingly (it's linear in the number of elements). This raises a `std::runtime_error` exception if the Tensor is on GPU because it's not implemented yet + * \return Whether the tensor contains any invalid value + */ + inline bool is_valid() const { + // TODO : replace this with a custom exception + if (device->type == DeviceType::CPU) { + const size_t s = d.size(); + for (unsigned i = 0; i < s; ++i) + if (std::isnan(v[i]) || std::isinf(v[i])) return false; + return true; + } else { +#if HAVE_CUDA + if (device->type == DeviceType::GPU) { + throw std::runtime_error("is_valid() not implemented on GPU"); + } +#endif + } + return false; + } + + /** + * \brief Get a Tensor object representing a single batch. + * \details If this tensor only has a single batch, then broadcast. Otherwise, check to make sure that the requested batch is smaller than the number of batches. + * + * TODO: This is a bit wasteful, as it re-calculates `bs.batch_size()` every time. + * + * \param b Batch id + * \return Sub tensor at batch `b` + */ + Tensor batch_elem(unsigned b) const { + if (d.batch_elems() == 1) { + return *this; + } else { + if (b >= d.batch_elems()) { + std::stringstream ss; + ss << "Requested batch id " << b << " is greater than the number of batch " << d.batch_elems(); + throw std::runtime_error(ss.str()); + } + const unsigned bsize = d.batch_size(); + Dim new_d(d); new_d.bd = 1; + Tensor ret(new_d, v + bsize * b, device, mem_pool); + return ret; + } + } + + // get tensors for all batches + /** + * \brief Get tensors for all batches + * \return List of the tensors in each batch + */ + std::vector batch_elems() const { + if (d.batch_elems() == 1) { + return std::vector(1, *this); + } else { + std::vector bs(d.batch_elems()); + unsigned bsize = d.batch_size(); + Dim new_d = d; new_d.bd = 1; + for (unsigned b = 0; b < d.batch_elems(); ++b) + bs[b] = Tensor(new_d, v + bsize * b, device, mem_pool); + return bs; + } + } + + Dim d; /**< Shape of tensor */ + float* v; /**< Pointer to memory */ + Device* device; + DeviceMempool mem_pool; + +private: + DYNET_SERIALIZE_SPLIT_DECLARE() +}; + +template<> inline Eigen::TensorMap> Tensor::t<0>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.size() == 1, "Illegal access of tensor in function t<0>(): dim=" << d); + return Eigen::TensorMap>(v); +} +template<> inline const Eigen::TensorMap> Tensor::t<0>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.size() == 1, "Illegal access of tensor in function t<0>(): dim=" << d); + return Eigen::TensorMap>(v); +} +template<> inline Eigen::TensorMap> Tensor::t<1>() { + DYNET_ASSERT(d.batch_elems() == 1 && (d.ndims() == 1 || d.size() == d.rows()), "Illegal access of tensor in function t<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0]); +} +template<> inline const Eigen::TensorMap> Tensor::t<1>() const { + DYNET_ASSERT(d.batch_elems() == 1 && (d.ndims() == 1 || d.size() == d.rows()), "Illegal access of tensor in function t<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0]); +} +template<> inline Eigen::TensorMap> Tensor::t<2>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 2, "Illegal access of tensor in function t<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1]); + else return Eigen::TensorMap>(v, (int)d[0], (int)1); +} +template<> inline const Eigen::TensorMap> Tensor::t<2>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 2, "Illegal access of tensor in function t<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1]); + else return Eigen::TensorMap>(v, (int)d[0], (int)1); +} +template<> inline Eigen::TensorMap> Tensor::t<3>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 3, "Illegal access of tensor in function t<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2]); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1); +} +template<> inline const Eigen::TensorMap> Tensor::t<3>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 3, "Illegal access of tensor in function t<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2]); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1); +} +template<> inline Eigen::TensorMap> Tensor::t<4>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 4, "Illegal access of tensor in function t<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3]); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1); +} +template<> inline const Eigen::TensorMap> Tensor::t<4>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 4, "Illegal access of tensor in function t<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3]); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1); +} +// ... + +template<> inline Eigen::TensorMap> Tensor::tb<0>() { + DYNET_ASSERT(d.batch_size() == 1, "Illegal access of tensor in function tb<0>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d.bd); +} +template<> inline const Eigen::TensorMap> Tensor::tb<0>() const { + DYNET_ASSERT(d.batch_size() == 1, "Illegal access of tensor in function tb<0>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d.bd); +} +template<> inline Eigen::TensorMap> Tensor::tb<1>() { + DYNET_ASSERT(d.ndims() == 1 || d.batch_size() == d.rows(), "Illegal access of tensor in function tb<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0], (int)d.bd); +} +template<> inline const Eigen::TensorMap> Tensor::tb<1>() const { + DYNET_ASSERT(d.ndims() == 1 || d.batch_size() == d.rows(), "Illegal access of tensor in function tb<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0], (int)d.bd); +} +template<> inline Eigen::TensorMap> Tensor::tb<2>() { + DYNET_ASSERT(d.ndims() <= 2, "Illegal access of tensor in function tb<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)d.bd); +} +template<> inline const Eigen::TensorMap> Tensor::tb<2>() const { + DYNET_ASSERT(d.ndims() <= 2, "Illegal access of tensor in function tb<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)d.bd); +} +template<> inline Eigen::TensorMap> Tensor::tb<3>() { + DYNET_ASSERT(d.ndims() <= 3, "Illegal access of tensor in function tb<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)d.bd); +} +template<> inline const Eigen::TensorMap> Tensor::tb<3>() const { + DYNET_ASSERT(d.ndims() <= 3, "Illegal access of tensor in function tb<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)d.bd); +} +template<> inline Eigen::TensorMap> Tensor::tb<4>() { + DYNET_ASSERT(d.ndims() <= 4, "Illegal access of tensor in function tb<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3], (int)d.bd); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1, (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1, (int)d.bd); +} +template<> inline const Eigen::TensorMap> Tensor::tb<4>() const { + DYNET_ASSERT(d.ndims() <= 4, "Illegal access of tensor in function tb<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3], (int)d.bd); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1, (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1, (int)d.bd); +} +// ... + +/** + * \ingroup tensor + * \brief You can use `cout< as_vector(const Tensor& v); + +/** + * \ingroup tensor + * \brief Represents a tensor of indices + * \details This holds indices to locations within a dimension or tensor. + * + */ +struct IndexTensor { + /** + * \brief Create an empty tensor + */ + IndexTensor() : d(Dim()), v(nullptr), device(nullptr), mem_pool(DeviceMempool::NONE) { } + /** + * \brief Creates a tensor + * \details [long description] + * + * \param d Shape of the tensor + * \param v Pointer to the values + * \param dev Device + * \param mem Memory pool + */ + IndexTensor(const Dim& d, Eigen::DenseIndex* v, Device* dev, DeviceMempool mem) : d(d), v(v), device(dev), mem_pool(mem) {} + + // Get view as an Eigen Tensor (see specializations below-- this is to work Eigen's and DYNETs compile-type vs. run-time differences) + /** + * \brief Get view as a Tensor + * \tparam Order Tensor order. Order 0 through 4 are already implemented for you + * \return Eigen Tensor of the given order + */ + template Eigen::TensorMap> t(); + template const Eigen::TensorMap> t() const; + + /** + * \brief Get view as an Eigen Tensor where the final dimension is the various batches + * \tparam Order Tensor order. Order 0 through 4 are already implemented for you + * \return Eigen Tensor of the given order + 1 + */ + template Eigen::TensorMap < Eigen::Tensor < Eigen::DenseIndex, Order + 1 >> tb(); + template const Eigen::TensorMap < Eigen::Tensor < Eigen::DenseIndex, Order + 1 >> tb() const; + + Dim d; /**< Shape of tensor */ + Eigen::DenseIndex* v; /**< Pointer to memory */ + Device* device; + DeviceMempool mem_pool; + +private: + DYNET_SERIALIZE_SPLIT_DECLARE() +}; + +template<> inline Eigen::TensorMap> IndexTensor::t<0>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.size() == 1, "Illegal access of tensor in function t<0>(): dim=" << d); + return Eigen::TensorMap>(v); +} +template<> inline const Eigen::TensorMap> IndexTensor::t<0>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.size() == 1, "Illegal access of tensor in function t<0>(): dim=" << d); + return Eigen::TensorMap>(v); +} +template<> inline Eigen::TensorMap> IndexTensor::t<1>() { + DYNET_ASSERT(d.batch_elems() == 1 && (d.ndims() == 1 || d.size() == d.rows()), "Illegal access of tensor in function t<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0]); +} +template<> inline const Eigen::TensorMap> IndexTensor::t<1>() const { + DYNET_ASSERT(d.batch_elems() == 1 && (d.ndims() == 1 || d.size() == d.rows()), "Illegal access of tensor in function t<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0]); +} +template<> inline Eigen::TensorMap> IndexTensor::t<2>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 2, "Illegal access of tensor in function t<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1]); + else return Eigen::TensorMap>(v, (int)d[0], (int)1); +} +template<> inline const Eigen::TensorMap> IndexTensor::t<2>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 2, "Illegal access of tensor in function t<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1]); + else return Eigen::TensorMap>(v, (int)d[0], (int)1); +} +template<> inline Eigen::TensorMap> IndexTensor::t<3>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 3, "Illegal access of tensor in function t<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2]); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1); +} +template<> inline const Eigen::TensorMap> IndexTensor::t<3>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 3, "Illegal access of tensor in function t<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2]); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1); +} +template<> inline Eigen::TensorMap> IndexTensor::t<4>() { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 4, "Illegal access of tensor in function t<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3]); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1); +} +template<> inline const Eigen::TensorMap> IndexTensor::t<4>() const { + DYNET_ASSERT(d.batch_elems() == 1 && d.ndims() <= 4, "Illegal access of tensor in function t<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3]); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1); +} +// ... + +template<> inline Eigen::TensorMap> IndexTensor::tb<0>() { + DYNET_ASSERT(d.batch_size() == 1, "Illegal access of tensor in function tb<0>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d.bd); +} +template<> inline const Eigen::TensorMap> IndexTensor::tb<0>() const { + DYNET_ASSERT(d.batch_size() == 1, "Illegal access of tensor in function tb<0>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d.bd); +} +template<> inline Eigen::TensorMap> IndexTensor::tb<1>() { + DYNET_ASSERT(d.ndims() == 1 || d.batch_size() == d.rows(), "Illegal access of tensor in function tb<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0], (int)d.bd); +} +template<> inline const Eigen::TensorMap> IndexTensor::tb<1>() const { + DYNET_ASSERT(d.ndims() == 1 || d.batch_size() == d.rows(), "Illegal access of tensor in function tb<1>(): dim=" << d); + return Eigen::TensorMap>(v, (int)d[0], (int)d.bd); +} +template<> inline Eigen::TensorMap> IndexTensor::tb<2>() { + DYNET_ASSERT(d.ndims() <= 2, "Illegal access of tensor in function tb<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)d.bd); +} +template<> inline const Eigen::TensorMap> IndexTensor::tb<2>() const { + DYNET_ASSERT(d.ndims() <= 2, "Illegal access of tensor in function tb<2>(): dim=" << d); + if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)d.bd); +} +template<> inline Eigen::TensorMap> IndexTensor::tb<3>() { + DYNET_ASSERT(d.ndims() <= 3, "Illegal access of tensor in function tb<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)d.bd); +} +template<> inline const Eigen::TensorMap> IndexTensor::tb<3>() const { + DYNET_ASSERT(d.ndims() <= 3, "Illegal access of tensor in function tb<3>(): dim=" << d); + if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)d.bd); +} +template<> inline Eigen::TensorMap> IndexTensor::tb<4>() { + DYNET_ASSERT(d.ndims() <= 4, "Illegal access of tensor in function tb<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3], (int)d.bd); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1, (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1, (int)d.bd); +} +template<> inline const Eigen::TensorMap> IndexTensor::tb<4>() const { + DYNET_ASSERT(d.ndims() <= 4, "Illegal access of tensor in function tb<4>(): dim=" << d); + if (d.ndims() == 4) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)d[3], (int)d.bd); + else if (d.ndims() == 3) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)d[2], (int)1, (int)d.bd); + else if (d.ndims() == 2) return Eigen::TensorMap>(v, (int)d[0], (int)d[1], (int)1, (int)1, (int)d.bd); + else return Eigen::TensorMap>(v, (int)d[0], (int)1, (int)1, (int)1, (int)d.bd); +} +// ... + +/** + * \ingroup tensor + * \brief Get the array of indices in an index tensor + * \details For higher order tensors this returns the flattened value + * + * \param v Input index tensor + * \return Index values + */ +std::vector as_vector(const IndexTensor& v); + +/** + * \ingroup tensor + * \brief Provides tools for creating, accessing, copying and modifying tensors (in-place) + * + */ +struct TensorTools { + /** + * \brief Clip the values in the tensor to a fixed range + * + * \param d Tensor to modify + * \param left Target minimum value + * \param right Target maximum value + */ + static void clip(Tensor& d, float left, float right); + /** + * \brief Fills the tensor with a constant value + * + * \param d Tensor to modify + * \param c Target value + */ + static void constant(Tensor& d, float c); + /** + * \brief Fills a tensor with zeros + * + * \param d Input tensor + */ + static void zero(Tensor& d); + /** + * \brief Set the (order 2) tensor as the identity matrix + * \details this throws a runtime_error exception if the tensor isn't a square matrix + * + * \param val Input tensor + */ + static void identity(Tensor& val); + // + /** + * \brief Fill the tensor with bernoulli random variables and scale them by scale + * + * \param val Input tensor + * \param p Parameter of the bernoulli distribution + * \param scale Scale of the random variables + */ + static void randomize_bernoulli(Tensor& val, real p, real scale = 1.0f); + /** + * \brief Fill the tensor with gaussian random variables + * + * \param val Input tensor + * \param mean Mean + * \param stddev Standard deviation + */ + static void randomize_normal(Tensor& val, real mean = 0.0f, real stddev = 1.0f); + /** + * \brief Fill the tensor with uniform random variables + * + * \param val Input tensor + * \param left Left bound of the interval + * \param right Right bound of the interval + */ + static void randomize_uniform(Tensor& val, real left = 0.0f, real right = 0.0f); + /** + * \brief Takes a square matrix tensor and sets it as a random orthonormal matrix + * \details More specifically this samples a random matrix with RandomizeUniform and then performs SVD and returns the left orthonormal matrix in the decomposition, scaled by `scale` + * + * \param val Input tensor + * \param scale Value to which the resulting orthonormal matrix will be scaled + */ + static void randomize_orthonormal(Tensor& val, real scale = 1.0f); + /** + * \brief Access element of the tensor by index in the values array + * \details AccessElement and SetElement are very, very slow (potentially) - use appropriately + * + * \param v Tensor + * \param index Index in the memory + * + * \return `v.v[index]` + */ + static float access_element(const Tensor& v, int index); + /** + * \brief Access element of the tensor by indices in the various dimension + * \details This only works for matrix shaped tensors (+ batch dimension). AccessElement and SetElement are very, very slow (potentially) - use appropriately + * + * \param v Tensor + * \param index Indices in the tensor + * + * \return `(*v)(index[0], index[1])` + */ + static float access_element(const Tensor& v, const Dim& index); + /** + * \brief Set element of the tensor by index in the values array + * \details AccessElement and SetElement are very, very slow (potentially) - use appropriately + * + * \param v Tensor + * \param index Index in the memory + * \param value Desired value + */ + static void set_element(const Tensor& v, int index, float value); + /** + * \brief Copy element from one tensor to another (by index in the values array) + * + * \param l Source tensor + * \param lindex Source index + * \param r Target tensor + * \param rindex Target index + */ + static void copy_element(const Tensor& l, int lindex, Tensor& r, int rindex); + + /** + * \brief Set the elements of a tensor with an array of values + * \details (This uses memcpy so be careful) + * + * \param v Input Tensor + * \param vec Values + */ + static void set_elements(const Tensor& v, const std::vector& vec); + /** + * \brief Copy one tensor into another + * + * \param v Target tensor + * \param v_src Source tensor + */ + static void copy_elements(const Tensor& v, const Tensor& v_src); + + /** + * \brief Calculate the index of the maximum value + * + * \param v A tensor where each row represents a probability distribution + * \param dim Which dimension to take the argmax over + * \param num The number of kmax values + * + * \returns A newly allocated LongTensor consisting of argmax IDs. The length of the + * dimension "dim" will be "num", consisting of the appropriate IDs. + */ + static IndexTensor argmax(const Tensor& v, unsigned dim = 0, unsigned num = 1); + + /** + * \brief Calculate samples from a log probability + * + * \param v A tensor where each row represents a log probability distribution + * \param dim Which dimension to take the sample over + * \param num The number of samples for each row + * + * \returns A newly allocated LongTensor consisting of argmax IDs. The length of the + * dimension "dim" will be "num", consisting of the appropriate IDs. + */ + static IndexTensor categorical_sample_log_prob(const Tensor& v, unsigned dim = 0, unsigned num = 1); + +protected: + template + static void clip_dev(MyDevice & dev, Tensor& d, float left, float right); + template + static void constant_dev(MyDevice & dev, Tensor& d, float c); + template + static IndexTensor argmax_dev(MyDevice & dev, const Tensor& v, unsigned dim = 0, unsigned num = 1); + template + static IndexTensor categorical_sample_log_prob_dev(MyDevice & dev, const Tensor& v, unsigned dim = 0, unsigned num = 1); + +}; + +/** + * \ingroup tensor + * \brief This is a helper function to sample uniformly in \f$[0,1]\f$ + * \return \f$x\sim\mathcal U([0,1])\f$ + */ +real rand01(); +/** + * \ingroup tensor + * \brief This is a helper function to sample uniformly in \f$\{0,\dots,n-1\}\f$ + * + * \param n Upper bound (excluded) + * \return \f$x\sim\mathcal U(\{0,\dots,n-1\})\f$ + */ +int rand0n(int n); +/** + * \ingroup tensor + * \brief This is a helper function to sample from a normalized gaussian distribution + * + * \return \f$x\sim\mathcal N(0,1)\f$ + */ +real rand_normal(); + +} // namespace dynet + +DYNET_VERSION_DEFINE(dynet::Tensor, 1) +#endif diff --git a/thirdparty/dynet/dynet/timing.h b/thirdparty/dynet/dynet/timing.h new file mode 100644 index 000000000..6f99334b4 --- /dev/null +++ b/thirdparty/dynet/dynet/timing.h @@ -0,0 +1,22 @@ +#ifndef _TIMING_H_ +#define _TIMING_H_ + +#include +#include +#include + +namespace dynet { + +struct Timer { + Timer(const std::string& msg) : msg(msg), start(std::chrono::high_resolution_clock::now()) {} + ~Timer() { + auto stop = std::chrono::high_resolution_clock::now(); + std::cerr << '[' << msg << ' ' << std::chrono::duration(stop-start).count() << " ms]\n"; + } + std::string msg; + std::chrono::high_resolution_clock::time_point start; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/dynet/training.cc b/thirdparty/dynet/dynet/training.cc new file mode 100644 index 000000000..686ddb60b --- /dev/null +++ b/thirdparty/dynet/dynet/training.cc @@ -0,0 +1,379 @@ +#include "dynet/training.h" + +#include +#include + +// #include "dynet/gpu-ops.h" +#include "dynet/param-nodes.h" +#include "dynet/weight-decay.h" + +// Macros for defining parameter update functions +#ifdef __CUDACC__ +#define DYNET_TRAINER_INST_DEV_IMPL(MyTrainer) \ + template void MyTrainer::update_rule_dev(const Device_GPU & dev, real scale, real gscale, const std::vector & values); +#elif defined(HAVE_CUDA) +// This is correct, but dying when models are read and written. +// if(values[0]->device->type == DeviceType::CPU) { update_rule_dev(*(Device_CPU*)values[0]->device,scale,gscale,values); } +// else if(values[0]->device->type == DeviceType::GPU) { update_rule_dev(*(Device_GPU*)values[0]->device,scale,gscale,values); } +// else { throw std::runtime_error("Bad device in MyTrainer::update_rule"); } +#define DYNET_TRAINER_INST_DEV_IMPL(MyTrainer) \ + extern template void MyTrainer::update_rule_dev(const Device_GPU & dev, real scale, real gscale, const std::vector & values); \ + template void MyTrainer::update_rule_dev(const Device_CPU & dev, real scale, real gscale, const std::vector & values); \ + void MyTrainer::update_rule(real scale, real gscale, const std::vector & values) { \ + if(default_device->type == DeviceType::CPU) { update_rule_dev(*(Device_CPU*)default_device,scale,gscale,values); } \ + else if(default_device->type == DeviceType::GPU) { update_rule_dev(*(Device_GPU*)default_device,scale,gscale,values); } \ + else { throw std::runtime_error("Bad device in MyTrainer::update_rule"); } \ + } +#else +#define DYNET_TRAINER_INST_DEV_IMPL(MyTrainer) \ + template void MyTrainer::update_rule_dev(const Device_CPU & dev, real scale, real gscale, const std::vector & values); \ + void MyTrainer::update_rule(real scale, real gscale, const std::vector & values) { \ + if(default_device->type == DeviceType::CPU) { update_rule_dev(*(Device_CPU*)default_device,scale,gscale,values); } \ + else { throw std::runtime_error("Bad device in MyTrainer::update_rule"); } \ + } +#endif + +namespace dynet { + +using namespace std; + +template +bool is_valid(const Eigen::MatrixBase& x) { + return ((x - x).array() == (x - x).array()).all(); +} + +// --- The actual update code for each operation, implemented on various devices + +// Trainer base class is run on CPUs +#ifndef __CUDACC__ + +Trainer::~Trainer() {} + +void Trainer::rescale_and_reset_weight_decay() { + const float weight_decay = model->weight_decay.current_weight_decay(); + const auto params = model->parameters_list(); + for (auto p : model->updated_parameters_list()) + params[p]->scale_parameters(weight_decay); + const auto lookup_params = model->lookup_parameters_list(); + for (auto p : model->updated_lookup_parameters_list()) + lookup_params[p]->scale_parameters(weight_decay); + model->weight_decay.reset_weight_decay(); +} + +float Trainer::clip_gradients(real scale) { + float gscale = 1; + if (clipping_enabled) { + // TODO should I handle updatebale differently? + float gg = model->gradient_l2_norm(); + if (isnan(gg) || isinf(gg)) { + ostringstream oss; oss << "Magnitude of gradient is bad: " << gg; + throw std::runtime_error(oss.str()); + } + if (scale * gg > clip_threshold) { + ++clips; + ++clips_since_status; + gscale = clip_threshold / (scale * gg); + } + } + return gscale; +} + +// this calls update on all of the parameters that are supposed to be updated +void Trainer::update(real scale) { + update(model->updated_parameters_list(), model->updated_lookup_parameters_list(), scale); +} + +// this calls the rule-specific updates over all updated parameters +void Trainer::update(const std::vector & upd_params, const std::vector & upd_lookup_params, real scale) { + // Allocate if necessary + if(!aux_allocated) { + alloc_impl(); + aux_allocated = true; + } + + // Perform gradient clipping and cycle through parameters + const float gscale = clip_gradients(scale); + const auto & params = model->parameters_list(); + for(auto i : upd_params) { + update_params(scale, gscale, i); + params[i]->clear(); + } + const auto & lookup_params = model->lookup_parameters_list(); + for(auto i : upd_lookup_params) { + if(sparse_updates_enabled && !lookup_params[i]->all_updated) { + for (auto j : lookup_params[i]->non_zero_grads) + update_lookup_params(scale, gscale, i, j); + } else { + update_lookup_params(scale, gscale, i); + } + lookup_params[i]->clear(); + } + ++updates; + ++updates_since_status; + + model->weight_decay.update_weight_decay(); // update global weight scale + if (model->weight_decay.parameters_need_rescaled()) + rescale_and_reset_weight_decay(); // if wdscale is getting to small multiply all weights by wdscale, and set wdscale to 1 +} + +#endif + +// --- SimpleSGDTrainer + +// Perform update of ts[0]=parameters, ts[1]=gradients +template +void SimpleSGDTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[0]->tvec().device(*dev.edevice) -= ts[1]->tvec() * (eta * scale * gscale / model->weight_decay.current_weight_decay()); +} +DYNET_TRAINER_INST_DEV_IMPL(SimpleSGDTrainer) + +#ifndef __CUDACC__ +void SimpleSGDTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g}); +} +void SimpleSGDTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx]}); +} +void SimpleSGDTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads}); +} +#endif + +// --- CyclicalSGDTrainer + +// Perform update of ts[0]=parameters, ts[1]=gradients +template +void CyclicalSGDTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[0]->tvec().device(*dev.edevice) -= ts[1]->tvec() * (eta * scale * gscale / model->weight_decay.current_weight_decay()); +} +DYNET_TRAINER_INST_DEV_IMPL(CyclicalSGDTrainer) + +#ifndef __CUDACC__ +void CyclicalSGDTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g}); +} +void CyclicalSGDTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx]}); +} +void CyclicalSGDTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads}); +} +#endif + +// --- MomentumSGDTrainer + +// Perform update of ts[0]=parameters, ts[1]=gradients, ts[2]=momentum +template +void MomentumSGDTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[2]->tvec().device(*dev.edevice) = ts[2]->tvec() * momentum - ts[1]->tvec() * (eta * scale * gscale); + ts[0]->tvec().device(*dev.edevice) += ts[2]->tvec() / model->weight_decay.current_weight_decay(); +} +DYNET_TRAINER_INST_DEV_IMPL(MomentumSGDTrainer) + +#ifndef __CUDACC__ +void MomentumSGDTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g, &vp[idx].h}); +} +void MomentumSGDTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx], &vlp[idx].h[lidx]}); +} +void MomentumSGDTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads, &vlp[idx].all_h}); +} +void MomentumSGDTrainer::alloc_impl() { + vp = allocate_shadow_parameters(*model); + vlp = allocate_shadow_lookup_parameters(*model); +} +#endif + +// --- AdagradTrainer + +// Perform update of ts[0]=parameters, ts[1]=gradients, ts[2]=stddev +template +void AdagradTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[1]->tvec().device(*dev.edevice) = ts[1]->tvec() * (scale * gscale); + ts[2]->tvec().device(*dev.edevice) += ts[1]->tvec().square(); + ts[0]->tvec().device(*dev.edevice) += ts[1]->tvec() / (ts[2]->tvec() + epsilon).sqrt() * (-eta / model->weight_decay.current_weight_decay()); +} +DYNET_TRAINER_INST_DEV_IMPL(AdagradTrainer) + +#ifndef __CUDACC__ +void AdagradTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g, &vp[idx].h}); +} +void AdagradTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx], &vlp[idx].h[lidx]}); +} +void AdagradTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads, &vlp[idx].all_h}); +} +void AdagradTrainer::alloc_impl() { + vp = allocate_shadow_parameters(*model); + vlp = allocate_shadow_lookup_parameters(*model); +} +#endif + +// --- AdadeltaTrainer + +// Perform update of ts[0]=parameters, ts[1]=gradients, ts[2]=hg, ts[3]=hd +template +void AdadeltaTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[1]->tvec().device(*dev.edevice) = ts[1]->tvec() * (scale * gscale); + ts[2]->tvec().device(*dev.edevice) = ts[2]->tvec() * rho + ts[1]->tvec().square() * (1.f - rho); + ts[1]->tvec().device(*dev.edevice) = - ts[1]->tvec() * (ts[3]->tvec() + epsilon).sqrt() / (ts[2]->tvec() + epsilon).sqrt(); + ts[3]->tvec().device(*dev.edevice) = ts[3]->tvec() * rho + ts[1]->tvec().square() * (1.f - rho); + ts[0]->tvec().device(*dev.edevice) += ts[1]->tvec() / model->weight_decay.current_weight_decay(); +} +DYNET_TRAINER_INST_DEV_IMPL(AdadeltaTrainer) + +#ifndef __CUDACC__ +void AdadeltaTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g, &hg[idx].h, &hd[idx].h}); +} +void AdadeltaTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx], &hlg[idx].h[lidx], &hld[idx].h[lidx]}); +} +void AdadeltaTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads, &hlg[idx].all_h, &hld[idx].all_h}); +} +void AdadeltaTrainer::alloc_impl() { + hg = allocate_shadow_parameters(*model); + hlg = allocate_shadow_lookup_parameters(*model); + hd = allocate_shadow_parameters(*model); + hld = allocate_shadow_lookup_parameters(*model); +} +#endif + +// --- RMSPropTrainer +// TODO: This is not finished yet, because it memorizes a scalar for each set of parameters, not each parameter itself. +// We could implement this with one tensor for each scalar, but this is pretty wasteful + +// Perform update of ts[0]=parameters, ts[1]=gradients +template +void RMSPropTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[1]->tvec().device(*dev.edevice) = ts[1]->tvec() * (scale * gscale); // Scale gradient + ts[2]->tvec().device(*dev.edevice) = ts[2]->tvec() * rho + ts[1]->tvec().square() * (1.f - rho); // Update square gradient exponential average + ts[1]->tvec().device(*dev.edevice) = - ts[1]->tvec() / (ts[2]->tvec() + epsilon).sqrt(); // Divide by the RMS + ts[0]->tvec().device(*dev.edevice) += eta * ts[1]->tvec() / model->weight_decay.current_weight_decay(); // Apply weight decay (should we do this?) + // real& d2 = hg[pi++]; + // real g2 = p->g.vec().squaredNorm(); + // d2 = rho * d2 + (1.f - rho) * g2; + // p->values.vec() -= ((eta * scale * gscale / sqrt(d2 + epsilon)) * p->g.vec()) / model->weight_decay.current_weight_decay(); +} +DYNET_TRAINER_INST_DEV_IMPL(RMSPropTrainer) + +#ifndef __CUDACC__ +void RMSPropTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g, &hmsg[idx].h}); +} +void RMSPropTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx], &hlmsg[idx].h[lidx]}); +} +void RMSPropTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads, &hlmsg[idx].all_h}); +} +void RMSPropTrainer::alloc_impl() { + hmsg = allocate_shadow_parameters(*model); + hlmsg = allocate_shadow_lookup_parameters(*model); +} +#endif + +// --- AdamTrainer + +// Perform update of ts[0]=parameters, ts[1]=gradients, ts[2]=mean, ts[3]=variance +template +void AdamTrainer::update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & ts) { + ts[1]->tvec().device(*dev.edevice) = ts[1]->tvec() * (scale * gscale); + ts[2]->tvec().device(*dev.edevice) = ts[2]->tvec() * beta_1 + ts[1]->tvec() * (1.f - beta_1); + ts[3]->tvec().device(*dev.edevice) = ts[3]->tvec() * beta_2 + ts[1]->tvec().square() * (1.f - beta_2); + float lr_t = eta * sqrt(1-pow(beta_2, updates+1))/(1-pow(beta_1, updates+1))/ model->weight_decay.current_weight_decay(); + ts[0]->tvec().device(*dev.edevice) -= ts[2]->tvec() / (ts[3]->tvec().sqrt() + epsilon) * lr_t; +} +DYNET_TRAINER_INST_DEV_IMPL(AdamTrainer) + +#ifndef __CUDACC__ +void AdamTrainer::update_params(real scale, real gscale, size_t idx) { + auto & p = model->parameters_list()[idx]; + update_rule(scale, gscale, {&p->values, &p->g, &m[idx].h, &v[idx].h}); +} +void AdamTrainer::update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->values[lidx], &p->grads[lidx], &lm[idx].h[lidx], &lv[idx].h[lidx]}); +} +void AdamTrainer::update_lookup_params(real scale, real gscale, size_t idx) { + auto & p = model->lookup_parameters_list()[idx]; + update_rule(scale, gscale, {&p->all_values, &p->all_grads, &lm[idx].all_h, &lv[idx].all_h}); +} +void AdamTrainer::alloc_impl() { + m = allocate_shadow_parameters(*model); + lm = allocate_shadow_lookup_parameters(*model); + v = allocate_shadow_parameters(*model); + lv = allocate_shadow_lookup_parameters(*model); +} +#endif + +#ifndef __CUDACC__ +// BOOST_CLASS_EXPORT_IMPLEMENT(dynet::SimpleSGDTrainer) +// BOOST_CLASS_EXPORT_IMPLEMENT(dynet::MomentumSGDTrainer) +// BOOST_CLASS_EXPORT_IMPLEMENT(dynet::AdagradTrainer) +// BOOST_CLASS_EXPORT_IMPLEMENT(dynet::AdadeltaTrainer) +// BOOST_CLASS_EXPORT_IMPLEMENT(dynet::RMSPropTrainer) +// BOOST_CLASS_EXPORT_IMPLEMENT(dynet::AdamTrainer) + +DYNET_SERIALIZE_COMMIT(Trainer, DYNET_SERIALIZE_DEFINE(eta0, eta, eta_decay, epoch, + clipping_enabled, clip_threshold, clips, updates, + aux_allocated, model)) +DYNET_SERIALIZE_IMPL(Trainer) + +DYNET_SERIALIZE_COMMIT(SimpleSGDTrainer, DYNET_SERIALIZE_DERIVED_EQ_DEFINE(Trainer)) +DYNET_SERIALIZE_IMPL(SimpleSGDTrainer) + +DYNET_SERIALIZE_COMMIT(CyclicalSGDTrainer, DYNET_SERIALIZE_DERIVED_EQ_DEFINE(Trainer)) +DYNET_SERIALIZE_IMPL(CyclicalSGDTrainer) + +DYNET_SERIALIZE_COMMIT(MomentumSGDTrainer, DYNET_SERIALIZE_DERIVED_DEFINE(Trainer, momentum, vp, vlp)) +DYNET_SERIALIZE_IMPL(MomentumSGDTrainer) + +DYNET_SERIALIZE_COMMIT(AdagradTrainer, DYNET_SERIALIZE_DERIVED_DEFINE(Trainer, epsilon, vp, vlp)) +DYNET_SERIALIZE_IMPL(AdagradTrainer) + +DYNET_SERIALIZE_COMMIT(AdadeltaTrainer, DYNET_SERIALIZE_DERIVED_DEFINE(Trainer, epsilon, rho, hg, hlg, hd, hld)) +DYNET_SERIALIZE_IMPL(AdadeltaTrainer) + +DYNET_SERIALIZE_COMMIT(RMSPropTrainer, DYNET_SERIALIZE_DERIVED_DEFINE(Trainer, epsilon, rho, hmsg, hlmsg)) +DYNET_SERIALIZE_IMPL(RMSPropTrainer) + +DYNET_SERIALIZE_COMMIT(AdamTrainer, DYNET_SERIALIZE_DERIVED_DEFINE(Trainer, beta_1, beta_2, epsilon, m, lm, v, lv)) +DYNET_SERIALIZE_IMPL(AdamTrainer) + +#endif + +} // namespace dynet + +#ifndef __CUDACC__ +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::SimpleSGDTrainer) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::CyclicalSGDTrainer) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::MomentumSGDTrainer) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::AdagradTrainer) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::AdadeltaTrainer) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::RMSPropTrainer) +BOOST_CLASS_EXPORT_IMPLEMENT(dynet::AdamTrainer) +#endif diff --git a/thirdparty/dynet/dynet/training.h b/thirdparty/dynet/dynet/training.h new file mode 100644 index 000000000..f75d3488d --- /dev/null +++ b/thirdparty/dynet/dynet/training.h @@ -0,0 +1,447 @@ +/** + * \file training.h + * \defgroup optimizers + * \brief Training procedures + * + * The various trainers are defined here. + * All trainers are structures inheriting from the `Trainer` struct. + * + * + */ + +#ifndef DYNET_TRAINING_H_ +#define DYNET_TRAINING_H_ + +#include + +#include + +#include "dynet/model.h" +#include "dynet/shadow-params.h" +#include "dynet/io-macros.h" + +#define DYNET_TRAINER_DEFINE_DEV_IMPL() \ + void update_params(real scale, real gscale, size_t idx) override; \ + void update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) override; \ + void update_lookup_params(real scale, real gscale, size_t idx) override; \ + template \ + void update_rule_dev(const MyDevice & dev, real scale, real gscale, const std::vector & values); \ + void update_rule(real scale, real gscale, const std::vector & values) override; + +namespace dynet { + +/** + * \ingroup optimizers + * + * \struct Trainer + * \brief General trainer struct + * + */ +struct Trainer { + /** + * \brief General constructor for a Trainer + * + * \param m Model to be trained + * \param e0 Initial learning rate + * \param edecay Learning rate decay + */ + explicit Trainer(Model& m, real e0, real edecay = 0.0) : + eta0(e0), eta(e0), eta_decay(edecay), epoch(), clipping_enabled(true), clip_threshold(5), + clips(), updates(), clips_since_status(), updates_since_status(), sparse_updates_enabled(true), aux_allocated(false), model(&m) {} + virtual ~Trainer(); + + /** + * \brief Update parameters + * \details Update the parameters according to the appropriate update rule + * + * \param scale The scaling factor for the gradients + */ + void update(real scale = 1.0); + + /** + * \brief Update subset of parameters + * \details Update some but not all of the parameters included in the model. This + * is the update_subset() function in the Python bindings. The + * parameters to be updated are specified by index, which can be found + * for Parameter and LookupParameter objects through the "index" variable + * (or the get_index() function in the Python bindings). + * + * \param updated_params The parameter indices to be updated + * \param updated_lookup_params The lookup parameter indices to be updated + * \param scale The scaling factor for the gradients + */ + void update(const std::vector & updated_params, const std::vector & updated_lookup_params, real scale = 1.0); + + void update_epoch(real r = 1) { + epoch += r; + eta = eta0 / (1 + epoch * eta_decay); + } + + /** + * \brief Clip gradient + * \details If clipping is enabled and the gradient is too big, return the amount to + * scale the gradient by (otherwise 1) + * + * + * \param scale The clipping limit + * \return The appropriate scaling factor + */ + float clip_gradients(real scale); + + // TODO: This is unprotected temporarily until there is a better solution + // for serializing the weight decay when saving models + // Rescale all the parameters handled by this model + void rescale_and_reset_weight_decay(); + + // learning rates + real eta0; + real eta; + real eta_decay; + real epoch; + + // clipping + bool clipping_enabled; + real clip_threshold; + real clips; + real updates; + // the number of clips and status since the last print + real clips_since_status; + real updates_since_status; + + /** + * \brief Whether to perform sparse updates + * \details DyNet trainers support two types of updates for lookup parameters, + * sparse and dense. Sparse updates are the default. They have the + * potential to be faster, as they only touch the parameters that have + * non-zero gradients. However, they may not always be faster (particulary + * on GPU with mini-batch training), and are not precisely numerically + * correct for some update rules such as MomentumTrainer and AdamTrainer. + * Thus, if you set this variable to false, the trainer will perform dense + * updates and be precisely correct, and maybe faster sometimes. + */ + bool sparse_updates_enabled; + + bool aux_allocated; + + void status() { + std::cerr << "[epoch=" << epoch << " eta=" << eta << " clips=" << clips_since_status << " updates=" << updates_since_status << "] "; + updates_since_status = clips_since_status = 0; + } + + Model* model; // parameters and gradients live here + +protected: + Trainer() {} + virtual void alloc_impl() { } + /** + * \brief The actual rule to update the parameters + * + * \param scale Scale of the update (i.e. learning rate) + * \param gscale Gradient scale based on clipping + * \param values Values specific to the particular update rule being implemented + */ + virtual void update_rule(real scale, real gscale, const std::vector & values) = 0; + /** + * \brief Parameter update function + * + * \param scale Scale of the update (i.e. learning rate) + * \param gscale Gradient scale based on clipping + * \param idx Index of the parameter + */ + virtual void update_params(real scale, real gscale, size_t idx) = 0; + /** + * \brief Sparse lookup parameter update function + * + * \param scale Scale of the update (i.e. learning rate) + * \param gscale Gradient scale based on clipping + * \param idx Index of the lookup parameter object + * \param lidx Index of the specific entry within the lookup parameter object + */ + virtual void update_lookup_params(real scale, real gscale, size_t idx, size_t lidx) = 0; + /** + * \brief Dense lookup parameter update function + * + * \param scale Scale of the update (i.e. learning rate) + * \param gscale Gradient scale based on clipping + * \param idx Index of the lookup parameter object + */ + virtual void update_lookup_params(real scale, real gscale, size_t idx) = 0; + +private: + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup optimizers + * + * \brief Stochastic gradient descent trainer + * \details This trainer performs stochastic gradient descent, the goto optimization procedure for neural networks. + * In the standard setting, the learning rate at epoch \f$t\f$ is \f$\eta_t=\frac{\eta_0}{1+\eta_{\mathrm{decay}}t}\f$ + * + * Reference : [reference needed](ref.need.ed) + * + */ +struct SimpleSGDTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param e0 Initial learning rate + * \param edecay Learning rate decay parameter. + */ + explicit SimpleSGDTrainer(Model& m, real e0 = 0.1, real edecay = 0.0) : Trainer(m, e0, edecay) {} +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() +private: + SimpleSGDTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup optimizers + * + * \brief Cyclical learning rate SGD + * \details This trainer performs stochastic gradient descent with a cyclical learning rate as proposed in [Smith, 2015](https://arxiv.org/abs/1506.01186). + * + * This uses a triangular function with optional exponential decay. + * + * More specifically, at each update, the learning rate \f$\eta\f$ is updated according to : + * + * \f$ + * \begin{split} + * \text{cycle} &= \left\lfloor 1 + \frac{\texttt{it}}{2 \times\texttt{step_size}} \right\rfloor\\ + * x &= \left\vert \frac{\texttt{it}}{\texttt{step_size}} - 2 \times \text{cycle} + 1\right\vert\\ + * \eta &= \eta_{\text{min}} + (\eta_{\text{max}} - \eta_{\text{min}}) \times \max(0, 1 - x) \times \gamma^{\texttt{it}}\\ + * \end{split} + * \f$ + * + * + * Reference : [Cyclical Learning Rates for Training Neural Networks](https://arxiv.org/abs/1506.01186) + * + */ +struct CyclicalSGDTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param e0_min Lower learning rate + * \param e0_max Upper learning rate + * \param step_size Period of the triangular function in number of iterations (__not__ epochs). According to the original paper, this should be set around (2-8) x (training iterations in epoch) + * \param gamma Learning rate upper bound decay parameter + * \param edecay Learning rate decay parameter. Ideally you shouldn't use this with cyclical learning rate since decay is already handled by \f$\gamma\f$ + */ + explicit CyclicalSGDTrainer(Model& m, float e0_min = 0.01, float e0_max = 0.1, float step_size = 2000, float gamma = 0.0, float edecay = 0.0) : Trainer(m, e0_min, edecay), e_min(e0_min), e_max(e0_max), step_size(step_size), gamma(gamma), it(0) {} + void update(real scale = 1.0) { Trainer::update(scale);cyclic_update_eta();} +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() + void cyclic_update_eta() { + float cycle = std::floor(1 + ((float) it) / (2 * step_size)); + float x = std::abs( ((float) it) / step_size - 2 * cycle + 1); + eta = e_min + ((1 - x) > 0 ? (e_max - e_min) * (1 - x) * (real)std::pow(gamma, it) : 0); + it++; + } + float e_min; + float e_max; + float step_size; + float gamma; + unsigned it; +private: + CyclicalSGDTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + + +/** + * \ingroup optimizers + * + * \brief Stochastic gradient descent with momentum + * \details This is a modified version of the SGD algorithm with momentum to stablize the gradient trajectory. + * The modified gradient is \f$\theta_{t+1}=\mu\theta_{t}+\nabla_{t+1}\f$ where \f$\mu\f$ is the momentum. + * + * Reference : [reference needed](ref.need.ed) + * + */ +struct MomentumSGDTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param e0 Initial learning rate + * \param mom Momentum + * \param edecay Learning rate decay parameter + */ + explicit MomentumSGDTrainer(Model& m, real e0 = 0.01, real mom = 0.9, real edecay = 0.0) : + Trainer(m, e0, edecay), momentum(mom) {} + +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() + virtual void alloc_impl() override; + + real momentum; + + // the following represent the current velocity + std::vector vp; + std::vector vlp; + //std::unordered_map vp; + //std::unordered_map> vl; +private: + MomentumSGDTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup optimizers + * + * \brief Adagrad optimizer + * \details The adagrad algorithm assigns a different learning rate to each parameter according to the following formula : + * \f$\delta_\theta^{(t)}=-\frac{\eta_0}{\epsilon+\sum_{i=0}^{t-1}(\nabla_\theta^{(i)})^2}\nabla_\theta^{(t)}\f$ + * + * Reference : [Duchi et al., 2011](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + * + */ +struct AdagradTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param e0 Initial learning rate + * \param eps Bias parameter \f$\epsilon\f$ in the adagrad formula + * \param edecay Learning rate decay parameter + */ + explicit AdagradTrainer(Model& m, real e0 = 0.1, real eps = 1e-20, real edecay = 0.0) : + Trainer(m, e0, edecay), epsilon(eps) {} +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() + virtual void alloc_impl() override; + + real epsilon; + std::vector vp; + std::vector vlp; +private: + AdagradTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup optimizers + * + * \brief AdaDelta optimizer + * \details The AdaDelta optimizer is a variant of Adagrad where + * \f$\frac{\eta_0}{\sqrt{\epsilon+\sum_{i=0}^{t-1}(\nabla_\theta^{(i)})^2}}\f$ is replaced by + * \f$\frac{\sqrt{\epsilon+\sum_{i=0}^{t-1}\rho^{t-i-1}(1-\rho)(\delta_\theta^{(i)})^2}}{\sqrt{\epsilon+\sum_{i=0}^{t-1}(\nabla_\theta^{(i)})^2}}\f$, + * hence eliminating the need for an initial learning rate. + * + * Reference : [ADADELTA: An Adaptive Learning Rate Method](https://arxiv.org/pdf/1212.5701v1) + * + */ +struct AdadeltaTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param eps Bias parameter \f$\epsilon\f$ in the adagrad formula + * \param rho Update parameter for the moving average of updates in the numerator + * \param edecay Learning rate decay parameter + */ + explicit AdadeltaTrainer(Model& m, real eps = 1e-6, real rho = 0.95, real edecay = 0.0) : + Trainer(m, 1.0, edecay), epsilon(eps), rho(rho) {} +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() + virtual void alloc_impl() override; + + real epsilon; + real rho; + std::vector hg; // History of gradients + std::vector hlg; + std::vector hd; // History of deltas + std::vector hld; +private: + AdadeltaTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup optimizers + * + * \brief RMSProp optimizer + * \details The RMSProp optimizer is a variant of Adagrad where the squared sum of previous gradients is replaced with a moving average with parameter \f$\rho\f$. + * + * Reference : [reference needed](ref.need.ed) + * + */ +struct RMSPropTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param e0 Initial learning rate + * \param eps Bias parameter \f$\epsilon\f$ in the adagrad formula + * \param rho Update parameter for the moving average (`rho = 0` is equivalent to using Adagrad) + * \param edecay Learning rate decay parameter + */ + explicit RMSPropTrainer(Model& m, real e0 = 0.001, real eps = 1e-08, real rho = 0.9, real edecay = 0.0) : + Trainer(m, e0, edecay), epsilon(eps), rho(rho) {} +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() + virtual void alloc_impl() override; + + real epsilon; + real rho; + std::vector hmsg; // History of gradients + std::vector hlmsg; +private: + RMSPropTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + +/** + * \ingroup optimizers + * + * \brief Adam optimizer + * \details The Adam optimizer is similar to RMSProp but uses unbiased estimates + * of the first and second moments of the gradient + * + * Reference : [Adam: A Method for Stochastic Optimization](https://arxiv.org/pdf/1412.6980v8) + * + */ +struct AdamTrainer : public Trainer { + /** + * \brief Constructor + * + * \param m Model to be trained + * \param e0 Initial learning rate + * \param beta_1 Moving average parameter for the mean + * \param beta_2 Moving average parameter for the variance + * \param eps Bias parameter \f$\epsilon\f$ + * \param edecay Learning rate decay parameter + */ + explicit AdamTrainer(Model& m, float e0 = 0.001, float beta_1 = 0.9, float beta_2 = 0.999, float eps = 1e-8, real edecay = 0.0) : + Trainer(m, e0, edecay), beta_1(beta_1), beta_2(beta_2), epsilon(eps) {} + +protected: + DYNET_TRAINER_DEFINE_DEV_IMPL() + virtual void alloc_impl() override; + + float beta_1; + float beta_2; + float epsilon; + std::vector m; // History of gradients + std::vector lm; + std::vector v; // History of deltas + std::vector lv; +private: + AdamTrainer() {} + DYNET_SERIALIZE_DECLARE() +}; + +} // namespace dynet + +BOOST_CLASS_EXPORT_KEY(dynet::SimpleSGDTrainer) +BOOST_CLASS_EXPORT_KEY(dynet::CyclicalSGDTrainer) +BOOST_CLASS_EXPORT_KEY(dynet::MomentumSGDTrainer) +BOOST_CLASS_EXPORT_KEY(dynet::AdagradTrainer) +BOOST_CLASS_EXPORT_KEY(dynet::AdadeltaTrainer) +BOOST_CLASS_EXPORT_KEY(dynet::RMSPropTrainer) +BOOST_CLASS_EXPORT_KEY(dynet::AdamTrainer) + +#endif diff --git a/thirdparty/dynet/dynet/treelstm.cc b/thirdparty/dynet/dynet/treelstm.cc new file mode 100644 index 000000000..05d875562 --- /dev/null +++ b/thirdparty/dynet/dynet/treelstm.cc @@ -0,0 +1,377 @@ +#include +#include +#include + +#include "dynet/nodes.h" +#include "dynet/treelstm.h" + +using namespace std; +using namespace dynet; +using namespace dynet::expr; + +BOOST_CLASS_EXPORT_IMPLEMENT(TreeLSTMBuilder) +BOOST_CLASS_EXPORT_IMPLEMENT(NaryTreeLSTMBuilder) +BOOST_CLASS_EXPORT_IMPLEMENT(UnidirectionalTreeLSTMBuilder) +BOOST_CLASS_EXPORT_IMPLEMENT(BidirectionalTreeLSTMBuilder) + +enum { X2I, BI, X2F, BF, X2O, BO, X2C, BC }; +enum { H2I, H2F, H2O, H2C, C2I, C2F, C2O }; + +Expression TreeLSTMBuilder::add_input_impl(int prev, const Expression& x) { throw std::runtime_error("add_input_impl() not a valid function for TreeLSTMBuilder"); } +Expression TreeLSTMBuilder::back() const { throw std::runtime_error("back() not a valid function for TreeLSTMBuilder"); } +std::vector TreeLSTMBuilder::final_h() const { throw std::runtime_error("final_h() not a valid function for TreeLSTMBuilder"); } +std::vector TreeLSTMBuilder::final_s() const { throw std::runtime_error("final_s() not a valid function for TreeLSTMBuilder"); } +unsigned TreeLSTMBuilder::num_h0_components() const { throw std::runtime_error("num_h0_components() not a valid function for TreeLSTMBuilder"); } +void TreeLSTMBuilder::copy(const RNNBuilder&) { throw std::runtime_error("copy() not a valid function for TreeLSTMBuilder"); } + +DYNET_SERIALIZE_COMMIT(TreeLSTMBuilder, DYNET_SERIALIZE_DERIVED_EQ_DEFINE(RNNBuilder)) +DYNET_SERIALIZE_IMPL(TreeLSTMBuilder); + +// See "Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks" +// by Tai, Nary, and Manning (2015), section 3.2, for details on this model. +// http://arxiv.org/pdf/1503.00075v3.pdf +NaryTreeLSTMBuilder::NaryTreeLSTMBuilder(unsigned N, + unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) : layers(layers), N(N), cg(nullptr) { + unsigned layer_input_dim = input_dim; + for (unsigned i = 0; i < layers; ++i) { + // i + Parameter p_x2i = model.add_parameters({hidden_dim, layer_input_dim}); + LookupParameter p_h2i = model.add_lookup_parameters(N, {hidden_dim, hidden_dim}); + LookupParameter p_c2i = model.add_lookup_parameters(N, {hidden_dim, hidden_dim}); + Parameter p_bi = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // f + Parameter p_x2f = model.add_parameters({hidden_dim, layer_input_dim}); + LookupParameter p_h2f = model.add_lookup_parameters(N*N, {hidden_dim, hidden_dim}); + LookupParameter p_c2f = model.add_lookup_parameters(N*N, {hidden_dim, hidden_dim}); + Parameter p_bf = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // o + Parameter p_x2o = model.add_parameters({hidden_dim, layer_input_dim}); + LookupParameter p_h2o = model.add_lookup_parameters(N, {hidden_dim, hidden_dim}); + LookupParameter p_c2o = model.add_lookup_parameters(N, {hidden_dim, hidden_dim}); + Parameter p_bo = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + // c (a.k.a. u) + Parameter p_x2c = model.add_parameters({hidden_dim, layer_input_dim}); + LookupParameter p_h2c = model.add_lookup_parameters(N, {hidden_dim, hidden_dim}); + Parameter p_bc = model.add_parameters({hidden_dim}, ParameterInitConst(0.f)); + + layer_input_dim = hidden_dim; // output (hidden) from 1st layer is input to next + + vector ps = {p_x2i, p_bi, p_x2f, p_bf, p_x2o, p_bo, p_x2c, p_bc}; + vector lps = {p_h2i, p_h2f, p_h2o, p_h2c, p_c2i, p_c2f, p_c2o}; + params.push_back(ps); + lparams.push_back(lps); + } // layers +} + +void NaryTreeLSTMBuilder::new_graph_impl(ComputationGraph& cg) { + this->cg = &cg; + param_vars.clear(); + lparam_vars.clear(); + param_vars.reserve(layers); + lparam_vars.reserve(layers); + + for (unsigned i = 0; i < layers; ++i){ + auto& p = params[i]; + auto& lp = lparams[i]; + + //i + Expression i_x2i = parameter(cg, p[X2I]); + Expression i_bi = parameter(cg, p[BI]); + //f + Expression i_x2f = parameter(cg, p[X2F]); + Expression i_bf = parameter(cg, p[BF]); + //o + Expression i_x2o = parameter(cg, p[X2O]); + Expression i_bo = parameter(cg, p[BO]); + //c + Expression i_x2c = parameter(cg, p[X2C]); + Expression i_bc = parameter(cg, p[BC]); + + vector vars = {i_x2i, i_bi, i_x2f, i_bf, i_x2o, i_bo, i_x2c, i_bc}; + param_vars.push_back(vars); + + DYNET_ASSERT(lp.size() == C2O + 1, "Dimension mismatch in TreeLSTM"); + vector> lvars(lp.size()); + for (unsigned p_type = H2I; p_type <= C2O; p_type++) { + LookupParameter p = lp[p_type]; + vector vals(p.get()->values.size()); + for (unsigned k = 0; k < p.get()->values.size(); ++k) { + //vals[k] = lookup(cg, p, k); + vals[k].i = 0; + } + lvars[p_type] = vals; + } + lparam_vars.push_back(lvars); + } +} + +Expression NaryTreeLSTMBuilder::Lookup(unsigned layer, unsigned p_type, unsigned value) { + if (lparam_vars[layer][p_type][value].i == 0) { + LookupParameter p = lparams[layer][p_type]; + lparam_vars[layer][p_type][value] = lookup(*cg, p, value); + } + return lparam_vars[layer][p_type][value]; +} + +// layout: 0..layers = c +// layers+1..2*layers = h +void NaryTreeLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + c.clear(); + if (hinit.size() > 0) { + DYNET_ARG_CHECK(layers*2 == hinit.size(), + "Incorrectly sized initialization in TreeLSTM (" << hinit.size() << "). " + "Must be twice the number of layers (which is " << layers<< ")"); + h0.resize(layers); + c0.resize(layers); + for (unsigned i = 0; i < layers; ++i) { + c0[i] = hinit[i]; + h0[i] = hinit[i + layers]; + } + has_initial_state = true; + } else { + has_initial_state = false; + } +} + +Expression NaryTreeLSTMBuilder::add_input(int id, vector children, const Expression& x) { + DYNET_ASSERT(id >= 0 && h.size() == (unsigned)id, "Failed dimension check in TreeLSTMBuilder"); + DYNET_ASSERT(id >= 0 && c.size() == (unsigned)id, "Failed dimension check in TreeLSTMBuilder"); + h.push_back(vector(layers)); + c.push_back(vector(layers)); + vector& ht = h.back(); + vector& ct = c.back(); + + Expression in = x; + for (unsigned i = 0; i < layers; ++i) { + const vector& vars = param_vars[i]; + vector i_h_children, i_c_children; + i_h_children.reserve(children.size() > 1 ? children.size() : 1); + i_c_children.reserve(children.size() > 1 ? children.size() : 1); + + bool has_prev_state = (children.size() > 0 || has_initial_state); + if (children.size() == 0) { + i_h_children.push_back(Expression()); + i_c_children.push_back(Expression()); + if (has_initial_state) { + // intial value for h and c at timestep 0 in layer i + // defaults to zero matrix input if not set in add_parameter_edges + i_h_children[0] = h0[i]; + i_c_children[0] = c0[i]; + } + } + else { // t > 0 + for (int child : children) { + i_h_children.push_back(h[child][i]); + i_c_children.push_back(c[child][i]); + } + } + + // input + Expression i_ait; + if (has_prev_state) { + vector xs = {vars[BI], vars[X2I], in}; + xs.reserve(4 * children.size() + 3); + for (unsigned j = 0; j < children.size(); ++j) { + unsigned ej = (j < N) ? j : N - 1; + xs.push_back(Lookup(i, H2I, ej)); + xs.push_back(i_h_children[j]); + xs.push_back(Lookup(i, C2I, ej)); + xs.push_back(i_c_children[j]); + } + DYNET_ASSERT(xs.size() == 4 * children.size() + 3, "Failed dimension check in TreeLSTMBuilder"); + i_ait = affine_transform(xs); + } + else + i_ait = affine_transform({vars[BI], vars[X2I], in}); + Expression i_it = logistic(i_ait); + + // forget + vector i_ft; + for (unsigned k = 0; k < children.size(); ++k) { + unsigned ek = (k < N) ? k : N - 1; + Expression i_aft; + if (has_prev_state) { + vector xs = {vars[BF], vars[X2F], in}; + xs.reserve(4 * children.size() + 3); + for (unsigned j = 0; j < children.size(); ++j) { + unsigned ej = (j < N) ? j : N - 1; + xs.push_back(Lookup(i, H2F, ej * N + ek)); + xs.push_back(i_h_children[j]); + xs.push_back(Lookup(i, C2F, ej * N + ek)); + xs.push_back(i_c_children[j]); + } + DYNET_ASSERT(xs.size() == 4 * children.size() + 3, "Failed dimension check in TreeLSTMBuilder"); + i_aft = affine_transform(xs); + } + else + i_aft = affine_transform({vars[BF], vars[X2F], in}); + i_ft.push_back(logistic(i_aft + 1.f)); + } + + // write memory cell + Expression i_awt; + if (has_prev_state) { + vector xs = {vars[BC], vars[X2C], in}; + // This is the one and only place that should *not* condition on i_c_children + // This should condition only on x (a.k.a. in), the bias (vars[BC]) and i_h_children + xs.reserve(2 * children.size() + 3); + for (unsigned j = 0; j < children.size(); ++j) { + unsigned ej = (j < N) ? j : N - 1; + xs.push_back(Lookup(i, H2C, ej)); + xs.push_back(i_h_children[j]); + } + DYNET_ASSERT(xs.size() == 2 * children.size() + 3, "Failed dimension check in TreeLSTMBuilder"); + i_awt = affine_transform(xs); + } + else + i_awt = affine_transform({vars[BC], vars[X2C], in}); + Expression i_wt = tanh(i_awt); + + // compute new cell value + if (has_prev_state) { + Expression i_nwt = cmult(i_it, i_wt); + vector i_crts(children.size()); + for (unsigned j = 0; j < children.size(); ++j) { + i_crts[j] = cmult(i_ft[j], i_c_children[j]); + } + Expression i_crt = sum(i_crts); + ct[i] = i_crt + i_nwt; + } + else { + ct[i] = cmult(i_it, i_wt); + } + + // output + Expression i_aot; + if (has_prev_state) { + vector xs = {vars[BO], vars[X2O], in}; + xs.reserve(4 * children.size() + 3); + for (unsigned j = 0; j < children.size(); ++j) { + unsigned ej = (j < N) ? j : N - 1; + xs.push_back(Lookup(i, H2O, ej)); + xs.push_back(i_h_children[j]); + xs.push_back(Lookup(i, C2O, ej)); + xs.push_back(i_c_children[j]); + } + DYNET_ASSERT(xs.size() == 4 * children.size() + 3, "Failed dimension check in TreeLSTMBuilder"); + i_aot = affine_transform(xs); + } + else + i_aot = affine_transform({vars[BO], vars[X2O], in}); + Expression i_ot = logistic(i_aot); + + // Compute new h value + Expression ph_t = tanh(ct[i]); + in = ht[i] = cmult(i_ot, ph_t); + } + return ht.back(); +} + +void NaryTreeLSTMBuilder::copy(const RNNBuilder & rnn) { + const NaryTreeLSTMBuilder & rnn_treelstm = (const NaryTreeLSTMBuilder&)rnn; + DYNET_ASSERT(params.size() == rnn_treelstm.params.size(), "Failed dimension check in TreeLSTMBuilder"); + for(size_t i = 0; i < params.size(); ++i) { + for(size_t j = 0; j < params[i].size(); ++j) { + params[i][j] = rnn_treelstm.params[i][j]; + } + } +} + +DYNET_SERIALIZE_COMMIT(NaryTreeLSTMBuilder, DYNET_SERIALIZE_DERIVED_DEFINE(TreeLSTMBuilder, params, lparams, layers, N)) +DYNET_SERIALIZE_IMPL(NaryTreeLSTMBuilder); + +UnidirectionalTreeLSTMBuilder::UnidirectionalTreeLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) { + node_builder = LSTMBuilder(layers, input_dim, hidden_dim, model); +} + +void UnidirectionalTreeLSTMBuilder::new_graph_impl(ComputationGraph& cg) { + node_builder.new_graph(cg); +} + +// layout: 0..layers = c +// layers+1..2*layers = h +void UnidirectionalTreeLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + node_builder.start_new_sequence(hinit); +} + +Expression UnidirectionalTreeLSTMBuilder::add_input(int id, vector children, const Expression& x) { + DYNET_ASSERT(id >= 0 && h.size() == (unsigned)id, "Failed dimension check in TreeLSTMBuilder"); + + RNNPointer prev = (RNNPointer)(-1); + Expression embedding = node_builder.add_input(prev, x); + prev = node_builder.state(); + + for (unsigned child : children) { + embedding = node_builder.add_input(prev, h[child]); + prev = node_builder.state(); + } + h.push_back(embedding); + return embedding; +} + +DYNET_SERIALIZE_COMMIT(UnidirectionalTreeLSTMBuilder, DYNET_SERIALIZE_DERIVED_DEFINE(TreeLSTMBuilder, node_builder)) +DYNET_SERIALIZE_IMPL(UnidirectionalTreeLSTMBuilder); + +BidirectionalTreeLSTMBuilder::BidirectionalTreeLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model) { + DYNET_ASSERT(hidden_dim % 2 == 0, "Failed dimension check in TreeLSTMBuilder"); + fwd_node_builder = LSTMBuilder(layers, input_dim, hidden_dim / 2, model); + rev_node_builder = LSTMBuilder(layers, input_dim, hidden_dim / 2, model); +} + +void BidirectionalTreeLSTMBuilder::new_graph_impl(ComputationGraph& cg) { + fwd_node_builder.new_graph(cg); + rev_node_builder.new_graph(cg); +} + +// layout: 0..layers = c +// layers+1..2*layers = h +void BidirectionalTreeLSTMBuilder::start_new_sequence_impl(const vector& hinit) { + h.clear(); + fwd_node_builder.start_new_sequence(hinit); + rev_node_builder.start_new_sequence(hinit); +} + +Expression BidirectionalTreeLSTMBuilder::add_input(int id, vector children, const Expression& x) { + DYNET_ASSERT(id >= 0 && h.size() == (unsigned)id, "Failed dimension check in TreeLSTMBuilder"); + + RNNPointer prev = (RNNPointer)(-1); + Expression fwd_embedding = fwd_node_builder.add_input(prev, x); + prev = fwd_node_builder.state(); + for (unsigned child : children) { + fwd_embedding = fwd_node_builder.add_input(prev, h[child]); + prev = fwd_node_builder.state(); + } + + prev = (RNNPointer)(-1); + Expression rev_embedding = rev_node_builder.add_input(prev, x); + prev = rev_node_builder.state(); + for (unsigned i = children.size(); i-- > 0;) { + unsigned child = children[i]; + rev_embedding = rev_node_builder.add_input(prev, h[child]); + prev = rev_node_builder.state(); + } + + Expression embedding = concatenate({fwd_embedding, rev_embedding}); + h.push_back(embedding); + + return embedding; +} + +Expression BidirectionalTreeLSTMBuilder::set_h_impl(int prev, const vector& h_new) { throw std::runtime_error("set_h() not a valid function for BidirectionalTreeLSTMBuilder"); } + +DYNET_SERIALIZE_COMMIT(BidirectionalTreeLSTMBuilder, DYNET_SERIALIZE_DERIVED_DEFINE(TreeLSTMBuilder, fwd_node_builder, rev_node_builder)) +DYNET_SERIALIZE_IMPL(BidirectionalTreeLSTMBuilder); diff --git a/thirdparty/dynet/dynet/treelstm.h b/thirdparty/dynet/dynet/treelstm.h new file mode 100644 index 000000000..eb2269db2 --- /dev/null +++ b/thirdparty/dynet/dynet/treelstm.h @@ -0,0 +1,122 @@ +#pragma once +#include +#include +#include +#include +#include "dynet/dynet.h" +#include "dynet/rnn.h" +#include "dynet/expr.h" +#include "dynet/lstm.h" +#include "dynet/io-macros.h" + +using namespace dynet::expr; + +namespace dynet { + +struct TreeLSTMBuilder : public RNNBuilder { +public: + virtual Expression back() const override; + virtual std::vector final_h() const override; + virtual std::vector final_s() const override; + virtual unsigned num_h0_components() const override; + virtual void copy(const RNNBuilder & params) override; + virtual Expression add_input(int id, std::vector children, const Expression& x) = 0; + std::vector get_h(RNNPointer i) const override { throw std::runtime_error("get_h() not a valid function for TreeLSTMBuilder"); } + std::vector get_s(RNNPointer i) const override { throw std::runtime_error("get_s() not a valid function for TreeLSTMBuilder"); } + Expression set_s_impl(int prev, const std::vector& s_new) override { throw std::runtime_error("set_s_impl() not a valid function for TreeLSTMBuilder"); } + protected: + virtual void new_graph_impl(ComputationGraph& cg) override = 0; + virtual void start_new_sequence_impl(const std::vector& h0) override = 0; + virtual Expression add_input_impl(int prev, const Expression& x) override; + +private: + DYNET_SERIALIZE_DECLARE() +}; + +struct NaryTreeLSTMBuilder : public TreeLSTMBuilder { + NaryTreeLSTMBuilder() = default; + explicit NaryTreeLSTMBuilder(unsigned N, //Max branching factor + unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + + Expression add_input(int id, std::vector children, const Expression& x) override; + void copy(const RNNBuilder & params) override; + protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression Lookup(unsigned layer, unsigned p_type, unsigned value); + + public: + // first index is layer, then ... + std::vector> params; + std::vector> lparams; + + // first index is layer, then ... + std::vector> param_vars; + std::vector>> lparam_vars; + + // first index is time, second is layer + std::vector> h, c; + + // initial values of h and c at each layer + // - both default to zero matrix input + bool has_initial_state; // if this is false, treat h0 and c0 as 0 + std::vector h0; + std::vector c0; + unsigned layers; + unsigned N; // Max branching factor +private: + ComputationGraph* cg; + + DYNET_SERIALIZE_DECLARE() +}; + +struct UnidirectionalTreeLSTMBuilder : public TreeLSTMBuilder { + UnidirectionalTreeLSTMBuilder() = default; + explicit UnidirectionalTreeLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + + Expression add_input(int id, std::vector children, const Expression& x) override; + protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + + public: + LSTMBuilder node_builder; + std::vector h; + +private: + DYNET_SERIALIZE_DECLARE() +}; + +struct BidirectionalTreeLSTMBuilder : public TreeLSTMBuilder { + BidirectionalTreeLSTMBuilder() = default; + explicit BidirectionalTreeLSTMBuilder(unsigned layers, + unsigned input_dim, + unsigned hidden_dim, + Model& model); + + Expression add_input(int id, std::vector children, const Expression& x) override; + protected: + void new_graph_impl(ComputationGraph& cg) override; + void start_new_sequence_impl(const std::vector& h0) override; + Expression set_h_impl(int prev, const std::vector& h_new) override; + + public: + LSTMBuilder fwd_node_builder; + LSTMBuilder rev_node_builder; + std::vector h; + +private: + DYNET_SERIALIZE_DECLARE() +}; +} // namespace dynet + +BOOST_CLASS_EXPORT_KEY(dynet::TreeLSTMBuilder) +BOOST_CLASS_EXPORT_KEY(dynet::NaryTreeLSTMBuilder) +BOOST_CLASS_EXPORT_KEY(dynet::UnidirectionalTreeLSTMBuilder) +BOOST_CLASS_EXPORT_KEY(dynet::BidirectionalTreeLSTMBuilder) diff --git a/thirdparty/dynet/dynet/weight-decay.cc b/thirdparty/dynet/dynet/weight-decay.cc new file mode 100644 index 000000000..60918b17a --- /dev/null +++ b/thirdparty/dynet/dynet/weight-decay.cc @@ -0,0 +1,8 @@ +#include "dynet/weight-decay.h" + +namespace dynet { + +DYNET_SERIALIZE_COMMIT(L2WeightDecay, DYNET_SERIALIZE_DEFINE(weight_decay, lambda)) +DYNET_SERIALIZE_IMPL(L2WeightDecay) + +} diff --git a/thirdparty/dynet/dynet/weight-decay.h b/thirdparty/dynet/dynet/weight-decay.h new file mode 100644 index 000000000..2bca43a6a --- /dev/null +++ b/thirdparty/dynet/dynet/weight-decay.h @@ -0,0 +1,46 @@ +#ifndef DYNET_WEIGHT_DECAY_H +#define DYNET_WEIGHT_DECAY_H + +#include +#include +#include +#include "dynet/io-macros.h" + +namespace dynet { + +// I don't bother with learning rates when computing how much the weight +// decay changes- those are hard to define in the adaptive update rules. +// So, we do something simple that works with everything. +// +// Note: you may want to discount lambda as you learn if your eta is on a +// decaying schedule. +struct L2WeightDecay { + explicit L2WeightDecay(float lambda = 1e-6) : weight_decay(1) { set_lambda(lambda); } + void set_lambda(float lam) { + if (lam < 0) throw std::domain_error("Bad value of lambda in set_lambda"); + lambda = lam; + } + void update_weight_decay(unsigned num_updates = 1) { + if (num_updates == 0) return; + if (num_updates == 1) + weight_decay -= weight_decay * lambda; + else weight_decay = weight_decay * std::pow(1-lambda, num_updates); + } + float current_weight_decay() const { return weight_decay; } + bool parameters_need_rescaled() const { + return (weight_decay < 0.25f); + } + void reset_weight_decay() { + std::cerr << "RESCALE WEIGHT DECAY FROM " << weight_decay << " to 1.0\n"; + weight_decay = 1.0f; + } + private: + DYNET_SERIALIZE_DECLARE() + + float weight_decay; + float lambda; +}; + +} // namespace dynet + +#endif diff --git a/thirdparty/dynet/third_party/eigen_backward_spatial_convolutions.h b/thirdparty/dynet/third_party/eigen_backward_spatial_convolutions.h new file mode 100644 index 000000000..3d8e2f357 --- /dev/null +++ b/thirdparty/dynet/third_party/eigen_backward_spatial_convolutions.h @@ -0,0 +1,503 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ + +#include +//#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +/** SpatialConvolutionBackwardInput + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Computes the backprop for the input of a 2D convolution. + * + * The output_backward parameter is expected to be a tensor with a rank of 3 or + * more (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, + * kernel_height, kernel_width) + * The output_backward and the kernel must both be in col-major layout. The + * result will also be in col-major layout. + * + * If row_in_stride, col_in_stride > 1, then applies convolution with holes + * (aka atrous convolution), sampling every row_in_stride, col_in_stride input + * pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the + * output_backward. The dimensions of the result will be filters, height, width + * (and others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided + * that the same order is used in the input, the kernel, and the output. + * + */ +#ifdef EIGEN_HAS_INDEX_LIST +typedef IndexList, type2index<0>, type2index<1>, type2index<1> > + ReverseColMajor; +typedef IndexList, type2index<1>, type2index<0>, type2index<0> > + ReverseRowMajor; +#else +typedef array ReverseColMajor; +typedef array ReverseRowMajor; +#endif + +template +EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits::Layout == ColMajor, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array< + IndexPair::Index>, 1>, + const Eigen::TensorForcedEvalOp::Index, + 2>, + const TensorShufflingOp< + const array< + typename internal::traits::Index, 4>, + const TensorReverseOp > > >, + const TensorReshapingOp< + const DSizes::Index, + 2>, + const TensorImagePatchOp > > >, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array< + IndexPair::Index>, 1>, + const TensorReshapingOp< + const DSizes::Index, + 2>, + const TensorImagePatchOp >, + const Eigen::TensorForcedEvalOp::Index, + 2>, + const TensorShufflingOp< + const array< + typename internal::traits::Index, 4>, + const TensorReverseOp > > > > > >::type +SpatialConvolutionBackwardInput( + const Kernel& kernel, const OutputBackward& output_backward, + typename internal::traits::Index inputRows, + typename internal::traits::Index inputCols, + const DenseIndex row_stride = 1, const DenseIndex col_stride = 1, + const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) { + typedef typename internal::traits::Index TensorIndex; + typedef typename internal::traits::Scalar OutScalar; + TensorRef::Scalar, + internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex> > + kern(kernel); + TensorRef::NumDimensions, + internal::traits::Layout, TensorIndex> > + out(output_backward); + + EIGEN_STATIC_ASSERT(internal::traits::Layout == + internal::traits::Layout, + YOU_MADE_A_PROGRAMMING_MISTAKE); + + static const bool isColMajor = + (internal::traits::Layout == ColMajor); + + static const int NumDims = internal::traits::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the + // result + const TensorIndex kernelFilters = + isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = + isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; + const TensorIndex kernelRows = + isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; + const TensorIndex kernelCols = + isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; + + // This is the effective kernel size, taking into account the (*_in_stride - + // 1) zero-values + // inserted between consecutive kernel elements in atrous convolution + const TensorIndex kernelRowsEff = + kernelRows + (kernelRows - 1) * (row_in_stride - 1); + const TensorIndex kernelColsEff = + kernelCols + (kernelCols - 1) * (col_in_stride - 1); + + const TensorIndex outputRows = isColMajor + ? output_backward.dimension(1) + : output_backward.dimension(NumDims - 2); + const TensorIndex outputCols = isColMajor + ? output_backward.dimension(2) + : output_backward.dimension(NumDims - 3); + + // Computing the forward padding + const TensorIndex forward_pad_top = numext::maxi( + 0, ((outputRows - 1) * row_stride + kernelRowsEff - inputRows) / 2); + const TensorIndex forward_pad_left = numext::maxi( + 0, ((outputCols - 1) * col_stride + kernelColsEff - inputCols) / 2); + const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; + const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; + + const TensorIndex padding_bottom = inputRows - (outputRows - 1) * row_stride - + 2 - padding_top + kernelRowsEff; + const TensorIndex padding_right = inputCols - (outputCols - 1) * col_stride - + 2 - padding_left + kernelColsEff; + + eigen_assert(padding_top >= 0); + eigen_assert(padding_left >= 0); + eigen_assert(padding_bottom >= 0); + eigen_assert(padding_right >= 0); + + // The kernel has dimensions filters X channels X patch_rows X patch_cols + // We need to reverse the kernel along dimensions corresponding to rows and + // cols. + // TODO(yangke): we can make things slightly faster by collapsing the + // dimensions + // where we don't reverse. Try that once we have a faster compiler. + typedef typename internal::conditional::type Reverse; + Reverse kernel_reverse; + +#ifndef EIGEN_HAS_INDEX_LIST + if (isColMajor) { + kernel_reverse[0] = false; + kernel_reverse[1] = false; + kernel_reverse[2] = true; + kernel_reverse[3] = true; + } else { + kernel_reverse[0] = true; + kernel_reverse[1] = true; + kernel_reverse[2] = false; + kernel_reverse[3] = false; + } +#endif + + // Reorder the dimensions to filters X patch_rows X patch_cols X channels + array kernel_shuffle; + if (isColMajor) { + kernel_shuffle[0] = 0; + kernel_shuffle[1] = 2; + kernel_shuffle[2] = 3; + kernel_shuffle[3] = 1; + } else { + kernel_shuffle[0] = 2; + kernel_shuffle[1] = 0; + kernel_shuffle[2] = 1; + kernel_shuffle[3] = 3; + } + + // Collapse the dims + DSizes kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters * kernelRows * kernelCols; + kernel_dims[1] = kernelChannels; + } else { + kernel_dims[1] = kernelFilters * kernelRows * kernelCols; + kernel_dims[0] = kernelChannels; + } + + // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS + // When we extract the image patches from output_backward, it will have + // dimensions + // out_depth X (patch_rows * patch_cols) X (input_rows * input_cols * + // OTHERS) + DSizes pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelFilters * kernelRows * kernelCols; + pre_contract_dims[1] = inputRows * inputCols; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[1] *= out.dimension(i); + } + } else { + pre_contract_dims[1] = kernelFilters * kernelRows * kernelCols; + pre_contract_dims[0] = inputRows * inputCols; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= out.dimension(i); + } + } + + // We will contract along the fused dimension that contains the kernelFilters, + // the kernelRows and the kernelCols. + array, 1> contract_dims; + if (isColMajor) { + // col-major: kernel.contract(output.patches) + contract_dims[0] = IndexPair(0, 0); + } else { + // row-major: output.patches.contract(kernel) + contract_dims[0] = IndexPair(1, 1); + } + + // Post contraction, the dimensions of the input_backprop is + // channels X input_rows X input_cols X OTHERS + DSizes post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelChannels; + post_contract_dims[1] = inputRows; + post_contract_dims[2] = inputCols; + for (int i = 3; i < NumDims; ++i) { + post_contract_dims[i] = out.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelChannels; + post_contract_dims[NumDims - 2] = inputRows; + post_contract_dims[NumDims - 3] = inputCols; + for (int i = 0; i < NumDims - 3; ++i) { + post_contract_dims[i] = out.dimension(i); + } + } + + return choose( + Cond::Layout == ColMajor>(), + kernel.reverse(kernel_reverse) + .shuffle(kernel_shuffle) + .reshape(kernel_dims) + .eval() + .contract( + output_backward + .extract_image_patches( + kernelRows, kernelCols, 1, 1, row_in_stride, + col_in_stride, row_stride, col_stride, padding_top, + padding_bottom, padding_left, padding_right, OutScalar(0)) + .reshape(pre_contract_dims), + contract_dims) + .reshape(post_contract_dims), + output_backward + .extract_image_patches(kernelRows, kernelCols, 1, 1, row_in_stride, + col_in_stride, row_stride, col_stride, + padding_top, padding_bottom, padding_left, + padding_right, OutScalar(0)) + .reshape(pre_contract_dims) + .contract(kernel.reverse(kernel_reverse) + .shuffle(kernel_shuffle) + .reshape(kernel_dims) + .eval(), + contract_dims) + .reshape(post_contract_dims)); +} + +/** SpatialConvolutionBackwardKernel + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Computes the backprop for the filter of a 2D convolution. + * + * The output_backward parameter is expected to be a tensor with a rank of 3 or + * more (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, + * kernel_height, kernel_width) + * The output_backward and the kernel must both be in col-major layout. The + * result will also be in col-major layout. + * + * If row_in_stride, col_stride > 1, then applies convolution with holes (aka + * atrous convolution), sampling every row_in_stride, col_in_stride input + * pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the + * output_backward. The dimensions of the result will be filters, height, width + * (and others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided + * that the same order is used in the input, the kernel, and the output. + * + */ + +template +EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits::Layout == ColMajor, + TensorReshapingOp< + const DSizes::Index, 4>, + const TensorContractionOp< + const array::Index>, 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const OutputBackward>, + const TensorShufflingOp< + const array::Index, + 2>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp > > > >, + TensorReshapingOp< + const DSizes::Index, 4>, + const TensorContractionOp< + const array::Index>, 1>, + const TensorShufflingOp< + const array::Index, + 2>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp > >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const OutputBackward> > > >::type +SpatialConvolutionBackwardKernel( + const Input& input, const OutputBackward& output_backward, + typename internal::traits::Index kernelRows, + typename internal::traits::Index kernelCols, + const DenseIndex row_stride = 1, const DenseIndex col_stride = 1, + const DenseIndex row_in_stride = 1, const DenseIndex col_in_stride = 1) { + typedef typename internal::traits::Index TensorIndex; + typedef typename internal::traits::Scalar OutScalar; + TensorRef::Scalar, + internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex> > + in(input); + TensorRef::NumDimensions, + internal::traits::Layout, TensorIndex> > + out(output_backward); + + EIGEN_STATIC_ASSERT(internal::traits::Layout == + internal::traits::Layout, + YOU_MADE_A_PROGRAMMING_MISTAKE); + + // stride and in_stride cannot both be larger than 1 + eigen_assert(!(row_stride > 1 && row_in_stride > 1) && + !(col_stride > 1 && col_in_stride > 1)); + + static const bool isColMajor = (internal::traits::Layout == ColMajor); + + static const int NumDims = internal::traits::NumDimensions; + EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == + internal::traits::NumDimensions, + YOU_MADE_A_PROGRAMMING_MISTAKE); + + const TensorIndex inputRows = + isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex inputCols = + isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + + const TensorIndex outputRows = isColMajor + ? output_backward.dimension(1) + : output_backward.dimension(NumDims - 2); + const TensorIndex outputCols = isColMajor + ? output_backward.dimension(2) + : output_backward.dimension(NumDims - 3); + + // Number of filters to apply. This is the same as the output depth of the + // result + const TensorIndex kernelFilters = + isColMajor ? out.dimensions()[0] : out.dimensions()[NumDims - 1]; + + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = + isColMajor ? in.dimensions()[0] : in.dimensions()[NumDims - 1]; + + // This is the effective kernel size, taking into account the (*_in_stride - + // 1) zero-values + // inserted between consecutive kernel elements in atrous convolution + const TensorIndex kernelRowsEff = + kernelRows + (kernelRows - 1) * (row_in_stride - 1); + const TensorIndex kernelColsEff = + kernelCols + (kernelCols - 1) * (col_in_stride - 1); + + // Computing the forward padding + const TensorIndex padRows = numext::maxi( + 0, (outputRows - 1) * row_stride + kernelRowsEff - inputRows); + const TensorIndex padCols = numext::maxi( + 0, (outputCols - 1) * col_stride + kernelColsEff - inputCols); + const TensorIndex padding_top = padRows / 2; + const TensorIndex padding_bottom = padRows - padding_top; + const TensorIndex padding_left = padCols / 2; + const TensorIndex padding_right = padCols - padding_left; + + // Reshaped out + DSizes output_dims; + if (isColMajor) { + output_dims[0] = kernelFilters; + output_dims[1] = outputRows * outputCols; + for (int i = 3; i < NumDims; ++i) { + output_dims[1] *= out.dimension(i); + } + } else { + output_dims[1] = kernelFilters; + output_dims[0] = outputCols * outputRows; + for (int i = 0; i < NumDims - 3; ++i) { + output_dims[0] *= out.dimension(i); + } + } + + // Reshaped extract_image_patches(in) + DSizes pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[1] = outputRows * outputCols; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[1] *= in.dimension(i); + } + eigen_assert(output_dims[1] == pre_contract_dims[1]); + } else { + pre_contract_dims[1] = kernelCols * kernelRows * kernelChannels; + pre_contract_dims[0] = outputRows * outputCols; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= in.dimension(i); + } + eigen_assert(output_dims[0] == pre_contract_dims[0]); + } + + array shuffle_dims; + shuffle_dims[0] = 1; + shuffle_dims[1] = 0; + + array, 1> contract_dims; + contract_dims[0] = IndexPair(1, 0); + + // After the contraction, the kernel will have the desired shape + // out_depth X in_shape X kernel_rows X kernel_cols + DSizes kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels; + kernel_dims[2] = kernelRows; + kernel_dims[3] = kernelCols; + } else { + kernel_dims[3] = kernelFilters; + kernel_dims[2] = kernelChannels; + kernel_dims[1] = kernelRows; + kernel_dims[0] = kernelCols; + } + + return choose( + Cond::Layout == ColMajor>(), + output_backward.reshape(output_dims) + .contract( + input + .extract_image_patches( + kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, 1, 1, padding_top, + padding_bottom, padding_left, padding_right, OutScalar(0)) + .reshape(pre_contract_dims) + .shuffle(shuffle_dims), + contract_dims) + .reshape(kernel_dims), + input + .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, 1, 1, + padding_top, padding_bottom, padding_left, + padding_right, OutScalar(0)) + .reshape(pre_contract_dims) + .shuffle(shuffle_dims) + .contract(output_backward.reshape(output_dims), contract_dims) + .reshape(kernel_dims)); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H diff --git a/thirdparty/dynet/third_party/eigen_spatial_convolutions.h b/thirdparty/dynet/third_party/eigen_spatial_convolutions.h new file mode 100644 index 000000000..273771430 --- /dev/null +++ b/thirdparty/dynet/third_party/eigen_spatial_convolutions.h @@ -0,0 +1,1070 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_ + +#include +//#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +namespace internal { + +// TODO: Consolidate this part of the code with the image patch extraction code +// since they are both very similar. +template +class TensorContractionInputMapper< + Scalar_, Index, Side, + TensorEvaluator< + const TensorReshapingOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> { + public: + typedef Scalar_ Scalar; + typedef TensorContractionInputMapper< + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + Self; + typedef TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + SubMapper; + typedef SubMapper VectorMapper; + typedef SubMapper LinearMapper; + typedef typename packet_traits::type Packet; + + EIGEN_DEVICE_FUNC + TensorContractionInputMapper( + const TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>& tensor, + const nocontract_t&, const nocontract_t&, const contract_t&, + const contract_t&) + : m_impl(tensor.impl().impl()) { + Index patch_rows; + Index patch_depth; + if (internal::traits::Layout == ColMajor) { + patch_depth = tensor.impl().dimensions()[0]; + patch_rows = tensor.impl().dimensions()[1]; + m_patch_cols = tensor.impl().dimensions()[2]; + m_num_patches = tensor.impl().dimensions()[3]; + } else { + const int NumDims = tensor.impl().dimensions().size(); + patch_depth = tensor.impl().dimensions()[NumDims - 1]; + patch_rows = tensor.impl().dimensions()[NumDims - 2]; + m_patch_cols = tensor.impl().dimensions()[NumDims - 3]; + m_num_patches = tensor.impl().dimensions()[NumDims - 4]; + } + m_patch_row_inflate_strides = tensor.impl().rowInflateStride(); + m_patch_col_inflate_strides = tensor.impl().colInflateStride(); + + m_colStride = patch_rows; + + m_outputRows = tensor.impl().outputRows(); + m_row_strides = tensor.impl().userRowStride(); + m_col_strides = tensor.impl().userColStride(); + + m_in_row_strides = tensor.impl().userInRowStride(); + m_in_col_strides = tensor.impl().userInColStride(); + + if (internal::traits::Layout == ColMajor) { + m_inputRows = tensor.impl().impl().dimensions()[1]; + m_inputCols = tensor.impl().impl().dimensions()[2]; + } else { + const int NumDims = tensor.impl().impl().dimensions().size(); + m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2]; + m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3]; + } + + m_rowInputStride = patch_depth; + m_colInputStride = patch_depth * m_inputRows; + m_patchInputStride = patch_depth * m_inputRows * m_inputCols; + + m_rowPaddingTop = tensor.impl().rowPaddingTop(); + m_colPaddingLeft = tensor.impl().colPaddingLeft(); + + m_fastInputRowStride = + internal::TensorIntDivisor(m_patch_row_inflate_strides); + m_fastInputColStride = + internal::TensorIntDivisor(m_patch_col_inflate_strides); + m_fastNumPatches = internal::TensorIntDivisor(m_num_patches); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); + m_fastDimZero = internal::TensorIntDivisor(patch_depth); + } + + EIGEN_DEVICE_FUNC + TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper) + : m_impl(base_mapper.m_impl) { + m_patch_cols = base_mapper.m_patch_cols; + m_num_patches = base_mapper.m_num_patches; + m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides; + m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides; + + m_colStride = base_mapper.m_colStride; + + m_rowInputStride = base_mapper.m_rowInputStride; + m_colInputStride = base_mapper.m_colInputStride; + m_patchInputStride = base_mapper.m_patchInputStride; + + m_inputRows = base_mapper.m_inputRows; + m_inputCols = base_mapper.m_inputCols; + + m_outputRows = base_mapper.m_outputRows; + m_row_strides = base_mapper.m_row_strides; + m_col_strides = base_mapper.m_col_strides; + + m_in_row_strides = base_mapper.m_in_row_strides; + m_in_col_strides = base_mapper.m_in_col_strides; + + m_rowPaddingTop = base_mapper.m_rowPaddingTop; + m_colPaddingLeft = base_mapper.m_colPaddingLeft; + + m_fastInputRowStride = base_mapper.m_fastInputRowStride; + m_fastInputColStride = base_mapper.m_fastInputColStride; + m_fastNumPatches = base_mapper.m_fastNumPatches; + m_fastColStride = base_mapper.m_fastColStride; + m_fastOutputRows = base_mapper.m_fastOutputRows; + m_fastDimZero = base_mapper.m_fastDimZero; + } + + // If true, turns off some optimizations for loading packets since the image + // patches are "non-standard" such as there are non-trivial strides or + // inflations in the input. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { + return m_in_row_strides != 1 || m_in_col_strides != 1 || + m_patch_row_inflate_strides != 1 || m_patch_col_inflate_strides != 1; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar operator()(Index row) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(0, rowIndex, colIndex, otherIndex); + return loadCoeff(row, rowIndex, colIndex, otherIndex); + } + + // Load the coefficient at the patchIndex location instead of the usual + // m_rowIndex, + // m_colIndex, m_otherIndex. This is currently only used by the gpu code. + // EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row, Index patchIndex) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex); + return loadCoeff(row, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index row) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(0, rowIndex, colIndex, otherIndex); + return loadPacket(row, rowIndex, colIndex, otherIndex); + } + + // Load the packet at the patchIndex location instead of the usual m_rowIndex, + // m_colIndex, m_otherIndex. This is currently only used by the gpu code. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index row, Index patchIndex) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex); + return loadPacket(row, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE const TensorEvaluator& impl() const { + return m_impl; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_rowInputStride; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchRows() const { return m_colStride; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchCols() const { return m_patch_cols; } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, + const Index baseIndex) const { + const Index inputIndex = depth + baseIndex; + return m_impl.template packet(inputIndex); + } + + private: + friend class TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment>; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index rowIndex, + Index colIndex, Index otherIndex) const { + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset * m_in_col_strides; + const Index origInputCol = + (m_patch_col_inflate_strides == 1) + ? inputCol + : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex + rowOffset * m_in_row_strides; + const Index origInputRow = + (m_patch_row_inflate_strides == 1) + ? inputRow + : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + if (origInputCol < 0 || origInputRow < 0 || origInputCol >= m_inputCols || + origInputRow >= m_inputRows || + (inputCol != origInputCol * m_patch_col_inflate_strides) || + (inputRow != origInputRow * m_patch_row_inflate_strides)) { + return Scalar(0); + } + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + origInputRow * m_rowInputStride + + origInputCol * m_colInputStride + otherIndex; + return m_impl.coeff(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index rowIndex, + Index colIndex, + Index otherIndex) const { + eigen_assert(!nonStandardPatches()); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset; + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex + rowOffset; + if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 || + inputRow >= m_inputRows) { + return Scalar(0); + } + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + inputRow * m_rowInputStride + + inputCol * m_colInputStride + otherIndex; + return m_impl.coeff(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index rowIndex, + Index colIndex, + Index otherIndex) const { + const Index packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols); + + if (nonStandardPatches()) { + return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); + } + return loadPacketStandard(patchId, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index patchId, Index rowIndex, + Index colIndex, + Index otherIndex) const { + const Index packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols); + + eigen_assert(!nonStandardPatches()); + + if ((patchDepth() % packetSize) == 0) { + return loadPacketFast(patchId, rowIndex, colIndex, otherIndex); + } else { + const Index patchOffsets[2] = { + patchId / m_fastDimZero, (patchId + packetSize - 1) / m_fastDimZero}; + + const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, + patchOffsets[1] / m_fastColStride}; + + const Index inputCols[2] = {colIndex + colOffsets[0], + colIndex + colOffsets[1]}; + if (inputCols[0] >= m_inputCols || inputCols[1] < 0) { + // all zeros + return internal::pset1(Scalar(0)); + } + + if (inputCols[0] == inputCols[1]) { + const Index rowOffsets[2] = { + patchOffsets[0] - colOffsets[0] * m_colStride, + patchOffsets[1] - colOffsets[1] * m_colStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + const Index inputRows[2] = {rowIndex + rowOffsets[0], + rowIndex + rowOffsets[1]}; + + if (inputRows[0] >= m_inputRows || inputRows[1] < 0) { + // all zeros + return internal::pset1(Scalar(0)); + } + + if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { + // no padding + const Index depth = patchId - patchOffsets[0] * patchDepth(); + const Index inputIndex = depth + inputRows[0] * m_rowInputStride + + inputCols[0] * m_colInputStride + otherIndex; + return m_impl.template packet(inputIndex); + } + } + } + return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index patchId, Index rowIndex, + Index colIndex, + Index otherIndex) const { + const Index packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < patchDepth() * patchRows() * m_patch_cols); + + eigen_assert(!nonStandardPatches()); + eigen_assert((patchDepth() % packetSize) == 0); + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset); + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset; + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex + rowOffset; + if (inputCol < 0 || inputRow < 0 || inputCol >= m_inputCols || + inputRow >= m_inputRows) { + // all zeros + return internal::pset1(Scalar(0)); + } + // no padding + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + inputRow * m_rowInputStride + + inputCol * m_colInputStride + otherIndex; + return m_impl.template packet(inputIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet packetWithPossibleZero( + Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX + typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = loadCoeff(patchId + i, rowIndex, colIndex, otherIndex); + } + Packet rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices( + Index patchIndex, Index& rowIndex, Index& colIndex, + Index& otherIndex) const { + const int NumInputDims = array_size< + typename TensorEvaluator::Dimensions>::value; + otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches; + const Index patch2DIndex = (NumInputDims == 3) + ? patchIndex + : (patchIndex - otherIndex * m_num_patches); + otherIndex *= m_patchInputStride; + colIndex = patch2DIndex / m_fastOutputRows; + rowIndex = patch2DIndex - colIndex * m_outputRows; + colIndex = colIndex * m_col_strides - m_colPaddingLeft; + rowIndex = rowIndex * m_row_strides - m_rowPaddingTop; + } + + Index m_patch_cols; // number of colums in the patch + Index m_num_patches; // number of patches to extract. + Index m_patch_row_inflate_strides; // the strides for row inflation in the + // image patch + Index m_patch_col_inflate_strides; // the strides for col inflation in the + // image patch + // Fast representation of inflation strides. + internal::TensorIntDivisor m_fastInputRowStride; + internal::TensorIntDivisor m_fastInputColStride; + + Index m_otherStride; + Index m_colStride; + internal::TensorIntDivisor m_fastNumPatches; + internal::TensorIntDivisor m_fastColStride; + + Index m_rowInputStride; // row stride in the input tensor + Index m_colInputStride; // col stride in the input tensor + Index m_patchInputStride; // patch stride in the input tensor + + Index m_inputRows; // Number of rows in the input tensor + Index m_inputCols; // Number of cols in the input tensor + + Index m_outputRows; // Number of patch rows + + Index m_row_strides; // User specified row stride + Index m_col_strides; // User specified col stride + + Index m_in_row_strides; // User specified input row stride + Index m_in_col_strides; // User specified input col stride + + Index m_rowPaddingTop; // Row padding + Index m_colPaddingLeft; // Column padding + + internal::TensorIntDivisor m_fastOutputRows; + internal::TensorIntDivisor m_fastDimZero; + + const TensorEvaluator m_impl; +}; + +template +class TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> { + public: + typedef typename packet_traits::type Packet; + typedef typename packet_traits::half HalfPacket; + + typedef TensorContractionInputMapper< + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + ParentMapper; + typedef TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + Self; + typedef Self LinearMapper; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper( + const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), + m_depth_offset(vert_offset), + m_col_offset(horiz_offset) { + m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, + m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper( + const Self& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper.m_base_mapper), + m_depth_offset(vert_offset + base_mapper.m_depth_offset), + m_col_offset(horiz_offset + base_mapper.m_col_offset) { + m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, + m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_base_mapper.loadCoeff(i + m_depth_offset, m_rowIndex, m_colIndex, + m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, + Index j) const { + return m_base_mapper(i + m_depth_offset, j + m_col_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return m_base_mapper.loadPacket(i + m_depth_offset, m_rowIndex, m_colIndex, + m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, + Index j) const { + return m_base_mapper.template loadPacket(i + m_depth_offset, + j + m_col_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar + loadCoeffStandard(Index i) const { + return m_base_mapper.loadCoeffStandard(i + m_depth_offset, m_rowIndex, + m_colIndex, m_otherIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index i) const { + return m_base_mapper.loadPacketFast(i + m_depth_offset, m_rowIndex, + m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet + loadPacketStandard(Index i) const { + return m_base_mapper.loadPacketStandard(i + m_depth_offset, m_rowIndex, + m_colIndex, m_otherIndex); + } + template + EIGEN_DEVICE_FUNC bool aligned(Index) const { + return false; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { + return m_base_mapper.nonStandardPatches(); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchDepth() const { + return m_base_mapper.m_rowInputStride; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchRows() const { + return m_base_mapper.m_colStride; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchCols() const { + return m_base_mapper.m_patch_cols; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, + const Index baseIndex) const { + const Index inputIndex = depth + baseIndex; + return m_base_mapper.m_impl.template packet(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padRow(const Index row) const { + const Index r = m_rowIndex + row; + return r < 0 || r >= m_base_mapper.m_inputRows; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padCol(const Index col) const { + const Index c = m_colIndex + col; + return c < 0 || c >= m_base_mapper.m_inputCols; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index baseIndex(const Index row, const Index col) const { + const Index r = m_rowIndex + row; + const Index c = m_colIndex + col; + return r * m_base_mapper.m_rowInputStride + + c * m_base_mapper.m_colInputStride + m_otherIndex; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index rowOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + return patchOffset - colOffset * m_base_mapper.m_colStride; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index colOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + return colOffset; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index depthOffset() const { + const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth(); + return patchOffset; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper + getLinearMapper(Index i, Index j) const { + return LinearMapper(m_base_mapper, i + m_depth_offset, j + m_col_offset); + } + + private: + const ParentMapper& m_base_mapper; // that was a reference before + Index m_depth_offset; // First row in the input matrix + Index m_col_offset; // First col in the input matrix + + Index m_rowIndex; // precomputed row index corresponding to the col offset + Index m_colIndex; // precomputed col index corresponding to the col offset + Index + m_otherIndex; // precomputed other index corresponding to the col offset +}; + +template +struct gemm_pack_rhs< + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { + typedef TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + SubMapper; + typedef SubMapper DataMapper; + + EIGEN_DEVICE_FUNC + static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; } + + EIGEN_DEVICE_FUNC + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + typedef typename packet_traits::type Packet; + + const Index packet_cols4 = (cols / 4) * 4; + const Index peeled_k = (depth / packet_size) * packet_size; + const bool non_standard_patches = rhs.nonStandardPatches(); + + for (Index j2 = 0; j2 < packet_cols4; j2 += 4) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + Index k = 0; + if ((packet_size % 4) == 0 && !non_standard_patches) { + const Index patch_depth = rhs.patchDepth(); + if ((patch_depth % packet_size) == 0) { + const Index patch_cols = rhs.patchCols(); + const Index patch_rows = rhs.patchRows(); + + const Index startCol = rhs.colOffset(); + const Index max_cols = std::min( + ceil_div(peeled_k, patch_rows * patch_depth) + startCol, + patch_cols); + + for (Index c = startCol; c < max_cols; ++c) { + eigen_assert(k < peeled_k); + const Index startRow = (c == startCol) ? rhs.rowOffset() : 0; + const Index max_rows = std::min( + ceil_div(peeled_k - c * patch_rows * patch_depth, patch_depth) + + startRow, + patch_rows); + + const bool pad_col0 = dm0.padCol(c); + const bool pad_col1 = dm1.padCol(c); + const bool pad_col2 = dm2.padCol(c); + const bool pad_col3 = dm3.padCol(c); + for (Index r = startRow; r < max_rows; ++r) { + eigen_assert(k < peeled_k); + const bool pad0 = pad_col0 || dm0.padRow(r); + const bool pad1 = pad_col1 || dm1.padRow(r); + const bool pad2 = pad_col2 || dm2.padRow(r); + const bool pad3 = pad_col3 || dm3.padRow(r); + + const Index idx0 = dm0.baseIndex(r, c); + const Index idx1 = dm1.baseIndex(r, c); + const Index idx2 = dm2.baseIndex(r, c); + const Index idx3 = dm3.baseIndex(r, c); + + const Index startDepth = + ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0; + const Index max_depth = + std::min(peeled_k - c * patch_rows * patch_depth - + r * patch_depth + startDepth, + patch_depth); + eigen_assert((max_depth - startDepth) % packet_size == 0); + for (Index d = startDepth; d < max_depth; d += packet_size) { + eigen_assert(k < peeled_k); + PacketBlock kernel; + kernel.packet[0] = pad0 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx0); + kernel.packet[1] = pad1 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx1); + kernel.packet[2] = pad2 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx2); + kernel.packet[3] = pad3 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx3); + ptranspose(kernel); + pstoreu(block + 0 * packet_size, kernel.packet[0]); + pstoreu(block + 1 * packet_size, kernel.packet[1]); + pstoreu(block + 2 * packet_size, kernel.packet[2]); + pstoreu(block + 3 * packet_size, kernel.packet[3]); + block += 4 * packet_size; + k += packet_size; + } + } + } + + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel; + kernel.packet[0] = dm0.loadPacketFast(k); + kernel.packet[1] = dm1.loadPacketFast(k); + kernel.packet[2] = dm2.loadPacketFast(k); + kernel.packet[3] = dm3.loadPacketFast(k); + ptranspose(kernel); + pstoreu(block + 0 * packet_size, kernel.packet[0]); + pstoreu(block + 1 * packet_size, kernel.packet[1]); + pstoreu(block + 2 * packet_size, kernel.packet[2]); + pstoreu(block + 3 * packet_size, kernel.packet[3]); + block += 4 * packet_size; + } + } else { + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel; + kernel.packet[0] = dm0.loadPacketStandard(k); + kernel.packet[1] = dm1.loadPacketStandard(k); + kernel.packet[2] = dm2.loadPacketStandard(k); + kernel.packet[3] = dm3.loadPacketStandard(k); + ptranspose(kernel); + pstoreu(block + 0 * packet_size, kernel.packet[0]); + pstoreu(block + 1 * packet_size, kernel.packet[1]); + pstoreu(block + 2 * packet_size, kernel.packet[2]); + pstoreu(block + 3 * packet_size, kernel.packet[3]); + block += 4 * packet_size; + } + } + } + if (!rhs.nonStandardPatches()) { + for (; k < depth; k++) { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } else { + for (; k < depth; k++) { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for (Index j2 = packet_cols4; j2 < cols; ++j2) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for (Index k = 0; k < depth; k++) { + *block = dm0(k); + block += 1; + } + } + } +}; + +// Special case for non-vectorized types such as float16. +template +struct gemm_pack_rhs< + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, + Alignment>, + nr, ColMajor, false, false> { + typedef TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator< + const TensorReshapingOp< + NewDimension, const TensorImagePatchOp >, + Device>, + nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, + Alignment> + SubMapper; + typedef SubMapper DataMapper; + + EIGEN_DEVICE_FUNC + static inline Index ceil_div(Index a, Index b) { return (a + b - 1) / b; } + + EIGEN_DEVICE_FUNC + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const Index packet_cols4 = (cols / 4) * 4; + + for (Index j2 = 0; j2 < packet_cols4; j2 += 4) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + if (!rhs.nonStandardPatches()) { + for (Index k = 0; k < depth; k++) { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } else { + for (Index k = 0; k < depth; k++) { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for (Index j2 = packet_cols4; j2 < cols; ++j2) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for (Index k = 0; k < depth; k++) { + *block = dm0(k); + block += 1; + } + } + } +}; + +} // end namespace internal + +/** SpatialConvolution + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a 2D convolution over a multichannel input image. + * + * The input parameter is expected to be a tensor with a rank of 3 or more + * (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, + * kernel_height, kernel_width) + * The input and the kernel must both be in col-major layout. The result will + * also be in col-major layout. + * + * If col_in_stride, row_in_stride > 1, then applies convolution with holes + * (aka atrous convolution), sampling every col_in_stride, row_in_stride input + * pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the + * input. The dimensions of the result will be filters, height, width (and + * others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided + * that the same order is used in the input, the kernel, and the output. + * + */ +template +EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits::Layout == ColMajor, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, + 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const Kernel>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp > > >, + TensorReshapingOp< + const DSizes::Index, + internal::traits::NumDimensions>, + const TensorContractionOp< + const array::Index>, + 1>, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const Kernel> > > >::type + SpatialConvolution(const Input& input, const Kernel& kernel, + const DenseIndex row_stride = 1, + const DenseIndex col_stride = 1, + const PaddingType padding_type = PADDING_SAME, + const DenseIndex row_in_stride = 1, + const DenseIndex col_in_stride = 1) { + typedef typename internal::traits::Index TensorIndex; + TensorRef::Scalar, + internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex> > + in(input); + TensorRef::Scalar, + internal::traits::NumDimensions, + internal::traits::Layout, TensorIndex> > + kern(kernel); + + EIGEN_STATIC_ASSERT( + internal::traits::Layout == internal::traits::Layout, + YOU_MADE_A_PROGRAMMING_MISTAKE); + const bool isColMajor = (internal::traits::Layout == ColMajor); + + const int NumDims = internal::traits::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the + // result + const TensorIndex kernelFilters = + isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = + isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; + const TensorIndex kernelRows = + isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; + const TensorIndex kernelCols = + isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; + + const DenseIndex kernelRowsEff = + kernelRows + (kernelRows - 1) * (row_in_stride - 1); + const DenseIndex kernelColsEff = + kernelCols + (kernelCols - 1) * (col_in_stride - 1); + + array, 1> contract_dims; + contract_dims[0] = IndexPair(1, 0); + + const TensorIndex InputRows = + isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex InputCols = + isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + + TensorIndex out_height; + TensorIndex out_width; + switch (padding_type) { + case PADDING_VALID: + out_height = numext::ceil((InputRows - kernelRowsEff + 1.f) / + static_cast(row_stride)); + out_width = numext::ceil((InputCols - kernelColsEff + 1.f) / + static_cast(col_stride)); + break; + case PADDING_SAME: + out_height = numext::ceil(InputRows / static_cast(row_stride)); + out_width = numext::ceil(InputCols / static_cast(col_stride)); + break; + default: + eigen_assert(false && "unexpected padding"); + } + + // Molds the output of the patch extraction code into a 2d tensor: + // - the first dimension (dims[0]): the patch values to be multiplied with the + // kernels + // - the second dimension (dims[1]): everything else + DSizes pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[1] = out_height * out_width; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[1] *= in.dimension(i); + } + } else { + pre_contract_dims[1] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[0] = out_height * out_width; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= in.dimension(i); + } + } + + // Molds the output of the contraction into the shape expected by the used + // (assuming this is ColMajor): + // - 1st dim: kernel filters + // - 2nd dim: output height + // - 3rd dim: output width + // - 4th dim and beyond: everything else including batch size + DSizes post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelFilters; + post_contract_dims[1] = out_height; + post_contract_dims[2] = out_width; + for (int i = 3; i < NumDims; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelFilters; + post_contract_dims[NumDims - 2] = out_height; + post_contract_dims[NumDims - 3] = out_width; + for (int i = 0; i < NumDims - 3; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } + + DSizes kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels * kernelRows * kernelCols; + } else { + kernel_dims[0] = kernelChannels * kernelRows * kernelCols; + kernel_dims[1] = kernelFilters; + } + // TODO(yangke): choose() is defined in TensorContraction.h -- consider + // moving it to somewhere more "common". + return choose( + Cond::Layout == ColMajor>(), + kernel.reshape(kernel_dims) + .contract(input + .extract_image_patches( + kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, padding_type) + .reshape(pre_contract_dims), + contract_dims) + .reshape(post_contract_dims), + input + .extract_image_patches(kernelRows, kernelCols, row_stride, col_stride, + row_in_stride, col_in_stride, padding_type) + .reshape(pre_contract_dims) + .contract(kernel.reshape(kernel_dims), contract_dims) + .reshape(post_contract_dims)); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_ From 8f21b5a85a9a565755587ecc0899934e146ed1d0 Mon Sep 17 00:00:00 2001 From: liu946 Date: Sat, 3 Jun 2017 04:05:02 +0800 Subject: [PATCH 04/22] double eigen --- thirdparty/eigen-3.2.10/COPYING.BSD | 26 - thirdparty/eigen-3.2.10/Eigen/Array | 11 - thirdparty/eigen-3.2.10/Eigen/CMakeLists.txt | 19 - thirdparty/eigen-3.2.10/Eigen/CholmodSupport | 45 - thirdparty/eigen-3.2.10/Eigen/Eigen | 2 - thirdparty/eigen-3.2.10/Eigen/Eigen2Support | 95 - .../eigen-3.2.10/Eigen/IterativeLinearSolvers | 40 - thirdparty/eigen-3.2.10/Eigen/LeastSquares | 32 - thirdparty/eigen-3.2.10/Eigen/MetisSupport | 28 - thirdparty/eigen-3.2.10/Eigen/OrderingMethods | 66 - thirdparty/eigen-3.2.10/Eigen/PaStiXSupport | 46 - thirdparty/eigen-3.2.10/Eigen/PardisoSupport | 30 - thirdparty/eigen-3.2.10/Eigen/QtAlignedMalloc | 34 - thirdparty/eigen-3.2.10/Eigen/SPQRSupport | 29 - thirdparty/eigen-3.2.10/Eigen/Sparse | 27 - thirdparty/eigen-3.2.10/Eigen/SparseCholesky | 47 - thirdparty/eigen-3.2.10/Eigen/SparseCore | 64 - thirdparty/eigen-3.2.10/Eigen/SparseLU | 49 - thirdparty/eigen-3.2.10/Eigen/SparseQR | 33 - thirdparty/eigen-3.2.10/Eigen/StdDeque | 27 - thirdparty/eigen-3.2.10/Eigen/StdList | 26 - thirdparty/eigen-3.2.10/Eigen/StdVector | 27 - thirdparty/eigen-3.2.10/Eigen/SuperLUSupport | 59 - thirdparty/eigen-3.2.10/Eigen/UmfPackSupport | 36 - .../eigen-3.2.10/Eigen/src/CMakeLists.txt | 7 - .../Eigen/src/Cholesky/CMakeLists.txt | 6 - .../eigen-3.2.10/Eigen/src/Cholesky/LLT_MKL.h | 102 - .../Eigen/src/CholmodSupport/CMakeLists.txt | 6 - .../Eigen/src/CholmodSupport/CholmodSupport.h | 607 - .../eigen-3.2.10/Eigen/src/Core/Assign_MKL.h | 224 - .../Eigen/src/Core/CMakeLists.txt | 10 - .../src/Core/arch/AltiVec/CMakeLists.txt | 6 - .../Eigen/src/Core/arch/AltiVec/Complex.h | 217 - .../Eigen/src/Core/arch/AltiVec/PacketMath.h | 501 - .../Eigen/src/Core/arch/CMakeLists.txt | 4 - .../src/Core/arch/Default/CMakeLists.txt | 6 - .../Eigen/src/Core/arch/NEON/CMakeLists.txt | 6 - .../Eigen/src/Core/arch/NEON/Complex.h | 253 - .../Eigen/src/Core/arch/NEON/PacketMath.h | 420 - .../Eigen/src/Core/arch/SSE/CMakeLists.txt | 6 - .../Eigen/src/Core/products/CMakeLists.txt | 6 - .../GeneralMatrixMatrixTriangular_MKL.h | 146 - .../Core/products/GeneralMatrixMatrix_MKL.h | 118 - .../Core/products/GeneralMatrixVector_MKL.h | 131 - .../products/SelfadjointMatrixMatrix_MKL.h | 295 - .../products/SelfadjointMatrixVector_MKL.h | 114 - .../products/TriangularMatrixMatrix_MKL.h | 309 - .../products/TriangularMatrixVector_MKL.h | 247 - .../products/TriangularSolverMatrix_MKL.h | 155 - .../Eigen/src/Core/util/CMakeLists.txt | 6 - .../Eigen/src/Core/util/NonMPL2.h | 3 - .../Eigen/src/Eigen2Support/Block.h | 126 - .../Eigen/src/Eigen2Support/CMakeLists.txt | 8 - .../Eigen/src/Eigen2Support/Cwise.h | 192 - .../Eigen/src/Eigen2Support/CwiseOperators.h | 298 - .../src/Eigen2Support/Geometry/AlignedBox.h | 159 - .../Eigen/src/Eigen2Support/Geometry/All.h | 115 - .../src/Eigen2Support/Geometry/AngleAxis.h | 214 - .../src/Eigen2Support/Geometry/CMakeLists.txt | 6 - .../src/Eigen2Support/Geometry/Hyperplane.h | 254 - .../Eigen2Support/Geometry/ParametrizedLine.h | 141 - .../src/Eigen2Support/Geometry/Quaternion.h | 495 - .../src/Eigen2Support/Geometry/Rotation2D.h | 145 - .../src/Eigen2Support/Geometry/RotationBase.h | 123 - .../src/Eigen2Support/Geometry/Scaling.h | 167 - .../src/Eigen2Support/Geometry/Transform.h | 786 - .../src/Eigen2Support/Geometry/Translation.h | 184 - .../eigen-3.2.10/Eigen/src/Eigen2Support/LU.h | 120 - .../Eigen/src/Eigen2Support/Lazy.h | 71 - .../Eigen/src/Eigen2Support/LeastSquares.h | 169 - .../Eigen/src/Eigen2Support/Macros.h | 20 - .../Eigen/src/Eigen2Support/MathFunctions.h | 57 - .../Eigen/src/Eigen2Support/Memory.h | 45 - .../Eigen/src/Eigen2Support/Meta.h | 75 - .../Eigen/src/Eigen2Support/Minor.h | 117 - .../eigen-3.2.10/Eigen/src/Eigen2Support/QR.h | 67 - .../Eigen/src/Eigen2Support/SVD.h | 637 - .../src/Eigen2Support/TriangularSolver.h | 42 - .../Eigen/src/Eigen2Support/VectorBlock.h | 94 - .../Eigen/src/Eigenvalues/CMakeLists.txt | 6 - .../Eigen/src/Eigenvalues/ComplexSchur_MKL.h | 93 - .../Eigen/src/Eigenvalues/RealSchur_MKL.h | 79 - .../Eigenvalues/SelfAdjointEigenSolver_MKL.h | 92 - .../Eigen/src/Geometry/CMakeLists.txt | 8 - .../Eigen/src/Geometry/arch/CMakeLists.txt | 6 - .../Eigen/src/Householder/CMakeLists.txt | 6 - .../BasicPreconditioners.h | 149 - .../src/IterativeLinearSolvers/BiCGSTAB.h | 263 - .../src/IterativeLinearSolvers/CMakeLists.txt | 6 - .../ConjugateGradient.h | 258 - .../IterativeLinearSolvers/IncompleteLUT.h | 478 - .../IterativeSolverBase.h | 282 - .../Eigen/src/Jacobi/CMakeLists.txt | 6 - .../eigen-3.2.10/Eigen/src/LU/CMakeLists.txt | 8 - .../Eigen/src/LU/PartialPivLU_MKL.h | 85 - .../Eigen/src/LU/arch/CMakeLists.txt | 6 - .../Eigen/src/MetisSupport/CMakeLists.txt | 6 - .../Eigen/src/MetisSupport/MetisSupport.h | 137 - .../Eigen/src/OrderingMethods/Amd.h | 444 - .../Eigen/src/OrderingMethods/CMakeLists.txt | 6 - .../Eigen/src/OrderingMethods/Eigen_Colamd.h | 1843 -- .../Eigen/src/OrderingMethods/Ordering.h | 154 - .../Eigen/src/PaStiXSupport/CMakeLists.txt | 6 - .../Eigen/src/PaStiXSupport/PaStiXSupport.h | 729 - .../Eigen/src/PardisoSupport/CMakeLists.txt | 6 - .../Eigen/src/PardisoSupport/PardisoSupport.h | 603 - .../eigen-3.2.10/Eigen/src/QR/CMakeLists.txt | 6 - .../Eigen/src/QR/ColPivHouseholderQR_MKL.h | 98 - .../Eigen/src/QR/HouseholderQR_MKL.h | 71 - .../Eigen/src/SPQRSupport/CMakeLists.txt | 6 - .../src/SPQRSupport/SuiteSparseQRSupport.h | 338 - .../eigen-3.2.10/Eigen/src/SVD/CMakeLists.txt | 6 - .../Eigen/src/SVD/JacobiSVD_MKL.h | 92 - .../Eigen/src/SparseCholesky/CMakeLists.txt | 6 - .../src/SparseCholesky/SimplicialCholesky.h | 671 - .../SparseCholesky/SimplicialCholesky_impl.h | 199 - .../Eigen/src/SparseCore/AmbiVector.h | 373 - .../Eigen/src/SparseCore/CMakeLists.txt | 6 - .../Eigen/src/SparseCore/CompressedStorage.h | 240 - .../ConservativeSparseSparseProduct.h | 245 - .../Eigen/src/SparseCore/MappedSparseMatrix.h | 181 - .../Eigen/src/SparseCore/SparseBlock.h | 623 - .../Eigen/src/SparseCore/SparseColEtree.h | 206 - .../src/SparseCore/SparseCwiseBinaryOp.h | 324 - .../Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 163 - .../Eigen/src/SparseCore/SparseDenseProduct.h | 311 - .../src/SparseCore/SparseDiagonalProduct.h | 196 - .../Eigen/src/SparseCore/SparseDot.h | 101 - .../Eigen/src/SparseCore/SparseFuzzy.h | 26 - .../Eigen/src/SparseCore/SparseMatrix.h | 1262 -- .../Eigen/src/SparseCore/SparseMatrixBase.h | 462 - .../Eigen/src/SparseCore/SparsePermutation.h | 148 - .../Eigen/src/SparseCore/SparseProduct.h | 188 - .../Eigen/src/SparseCore/SparseRedux.h | 48 - .../src/SparseCore/SparseSelfAdjointView.h | 507 - .../SparseSparseProductWithPruning.h | 150 - .../Eigen/src/SparseCore/SparseTranspose.h | 63 - .../src/SparseCore/SparseTriangularView.h | 179 - .../Eigen/src/SparseCore/SparseUtil.h | 172 - .../Eigen/src/SparseCore/SparseVector.h | 448 - .../Eigen/src/SparseCore/SparseView.h | 99 - .../Eigen/src/SparseCore/TriangularSolver.h | 334 - .../Eigen/src/SparseLU/CMakeLists.txt | 6 - .../Eigen/src/SparseLU/SparseLU.h | 806 - .../Eigen/src/SparseLU/SparseLUImpl.h | 66 - .../Eigen/src/SparseLU/SparseLU_Memory.h | 227 - .../Eigen/src/SparseLU/SparseLU_Structs.h | 111 - .../src/SparseLU/SparseLU_SupernodalMatrix.h | 298 - .../Eigen/src/SparseLU/SparseLU_Utils.h | 80 - .../Eigen/src/SparseLU/SparseLU_column_bmod.h | 180 - .../Eigen/src/SparseLU/SparseLU_column_dfs.h | 177 - .../src/SparseLU/SparseLU_copy_to_ucol.h | 106 - .../Eigen/src/SparseLU/SparseLU_gemm_kernel.h | 279 - .../src/SparseLU/SparseLU_heap_relax_snode.h | 127 - .../Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 130 - .../Eigen/src/SparseLU/SparseLU_panel_bmod.h | 223 - .../Eigen/src/SparseLU/SparseLU_panel_dfs.h | 258 - .../Eigen/src/SparseLU/SparseLU_pivotL.h | 137 - .../Eigen/src/SparseLU/SparseLU_pruneL.h | 135 - .../Eigen/src/SparseLU/SparseLU_relax_snode.h | 83 - .../Eigen/src/SparseQR/CMakeLists.txt | 6 - .../Eigen/src/SparseQR/SparseQR.h | 714 - .../Eigen/src/StlSupport/CMakeLists.txt | 6 - .../Eigen/src/StlSupport/StdDeque.h | 126 - .../Eigen/src/StlSupport/StdList.h | 106 - .../Eigen/src/StlSupport/StdVector.h | 126 - .../Eigen/src/StlSupport/details.h | 84 - .../Eigen/src/SuperLUSupport/CMakeLists.txt | 6 - .../Eigen/src/SuperLUSupport/SuperLUSupport.h | 1026 - .../Eigen/src/UmfPackSupport/CMakeLists.txt | 6 - .../Eigen/src/UmfPackSupport/UmfPackSupport.h | 475 - .../Eigen/src/misc/CMakeLists.txt | 6 - .../eigen-3.2.10/Eigen/src/misc/SparseSolve.h | 128 - thirdparty/eigen-3.2.10/Eigen/src/misc/blas.h | 658 - .../Eigen/src/plugins/CMakeLists.txt | 6 - thirdparty/eigen/.hgeol | 11 + thirdparty/eigen/.hgignore | 34 + thirdparty/eigen/.hgtags | 29 + thirdparty/eigen/CMakeLists.txt | 525 + thirdparty/eigen/COPYING.BSD | 26 + thirdparty/eigen/COPYING.GPL | 674 + thirdparty/eigen/COPYING.LGPL | 502 + thirdparty/eigen/COPYING.MINPACK | 52 + thirdparty/eigen/COPYING.MPL2 | 373 + thirdparty/eigen/COPYING.README | 18 + thirdparty/eigen/CTestConfig.cmake | 17 + thirdparty/eigen/CTestCustom.cmake.in | 3 + thirdparty/eigen/Eigen/CMakeLists.txt | 19 + thirdparty/eigen/Eigen/Cholesky | 41 + thirdparty/eigen/Eigen/CholmodSupport | 48 + thirdparty/eigen/Eigen/Core | 519 + thirdparty/eigen/Eigen/Dense | 7 + thirdparty/eigen/Eigen/Eigen | 2 + thirdparty/eigen/Eigen/Eigenvalues | 57 + thirdparty/eigen/Eigen/Geometry | 62 + thirdparty/eigen/Eigen/Householder | 30 + thirdparty/eigen/Eigen/IterativeLinearSolvers | 48 + thirdparty/eigen/Eigen/Jacobi | 33 + thirdparty/eigen/Eigen/LU | 46 + thirdparty/eigen/Eigen/MetisSupport | 35 + thirdparty/eigen/Eigen/OrderingMethods | 73 + thirdparty/eigen/Eigen/PaStiXSupport | 48 + thirdparty/eigen/Eigen/PardisoSupport | 35 + thirdparty/eigen/Eigen/QR | 47 + thirdparty/eigen/Eigen/QtAlignedMalloc | 40 + thirdparty/eigen/Eigen/SPQRSupport | 34 + thirdparty/eigen/Eigen/SVD | 47 + thirdparty/eigen/Eigen/Sparse | 34 + thirdparty/eigen/Eigen/SparseCholesky | 45 + thirdparty/eigen/Eigen/SparseCore | 69 + thirdparty/eigen/Eigen/SparseLU | 46 + thirdparty/eigen/Eigen/SparseQR | 37 + thirdparty/eigen/Eigen/StdDeque | 27 + thirdparty/eigen/Eigen/StdList | 26 + thirdparty/eigen/Eigen/StdVector | 27 + thirdparty/eigen/Eigen/SuperLUSupport | 64 + thirdparty/eigen/Eigen/UmfPackSupport | 40 + thirdparty/eigen/Eigen/src/Cholesky/LDLT.h | 669 + thirdparty/eigen/Eigen/src/Cholesky/LLT.h | 534 + .../eigen/Eigen/src/Cholesky/LLT_LAPACKE.h | 99 + .../Eigen/src/CholmodSupport/CholmodSupport.h | 639 + thirdparty/eigen/Eigen/src/Core/Array.h | 325 + thirdparty/eigen/Eigen/src/Core/ArrayBase.h | 226 + .../eigen/Eigen/src/Core/ArrayWrapper.h | 207 + thirdparty/eigen/Eigen/src/Core/Assign.h | 90 + .../eigen/Eigen/src/Core/AssignEvaluator.h | 913 + thirdparty/eigen/Eigen/src/Core/Assign_MKL.h | 176 + thirdparty/eigen/Eigen/src/Core/BandMatrix.h | 353 + thirdparty/eigen/Eigen/src/Core/Block.h | 452 + .../eigen/Eigen/src/Core/BooleanRedux.h | 164 + .../eigen/Eigen/src/Core/CommaInitializer.h | 160 + .../eigen/Eigen/src/Core/ConditionEstimator.h | 175 + .../eigen/Eigen/src/Core/CoreEvaluators.h | 1673 ++ .../eigen/Eigen/src/Core/CoreIterators.h | 127 + .../eigen/Eigen/src/Core/CwiseBinaryOp.h | 184 + .../eigen/Eigen/src/Core/CwiseNullaryOp.h | 866 + .../eigen/Eigen/src/Core/CwiseTernaryOp.h | 197 + .../eigen/Eigen/src/Core/CwiseUnaryOp.h | 103 + .../eigen/Eigen/src/Core/CwiseUnaryView.h | 128 + thirdparty/eigen/Eigen/src/Core/DenseBase.h | 601 + .../eigen/Eigen/src/Core/DenseCoeffsBase.h | 681 + .../eigen/Eigen/src/Core/DenseStorage.h | 563 + thirdparty/eigen/Eigen/src/Core/Diagonal.h | 257 + .../eigen/Eigen/src/Core/DiagonalMatrix.h | 343 + .../eigen/Eigen/src/Core/DiagonalProduct.h | 28 + thirdparty/eigen/Eigen/src/Core/Dot.h | 312 + thirdparty/eigen/Eigen/src/Core/EigenBase.h | 155 + .../eigen/Eigen/src/Core/ForceAlignedAccess.h | 146 + thirdparty/eigen/Eigen/src/Core/Fuzzy.h | 155 + .../eigen/Eigen/src/Core/GeneralProduct.h | 436 + .../eigen/Eigen/src/Core/GenericPacketMath.h | 593 + .../eigen/Eigen/src/Core/GlobalFunctions.h | 187 + thirdparty/eigen/Eigen/src/Core/IO.h | 239 + thirdparty/eigen/Eigen/src/Core/Inverse.h | 117 + thirdparty/eigen/Eigen/src/Core/Map.h | 164 + thirdparty/eigen/Eigen/src/Core/MapBase.h | 299 + .../eigen/Eigen/src/Core/MathFunctions.h | 1521 ++ .../eigen/Eigen/src/Core/MathFunctionsImpl.h | 78 + thirdparty/eigen/Eigen/src/Core/Matrix.h | 461 + thirdparty/eigen/Eigen/src/Core/MatrixBase.h | 530 + thirdparty/eigen/Eigen/src/Core/NestByValue.h | 110 + thirdparty/eigen/Eigen/src/Core/NoAlias.h | 108 + thirdparty/eigen/Eigen/src/Core/NumTraits.h | 246 + .../eigen/Eigen/src/Core/PermutationMatrix.h | 633 + .../eigen/Eigen/src/Core/PlainObjectBase.h | 1015 + thirdparty/eigen/Eigen/src/Core/Product.h | 186 + .../eigen/Eigen/src/Core/ProductEvaluators.h | 1101 ++ thirdparty/eigen/Eigen/src/Core/Random.h | 182 + thirdparty/eigen/Eigen/src/Core/Redux.h | 505 + thirdparty/eigen/Eigen/src/Core/Ref.h | 281 + thirdparty/eigen/Eigen/src/Core/Replicate.h | 142 + .../eigen/Eigen/src/Core/ReturnByValue.h | 117 + thirdparty/eigen/Eigen/src/Core/Reverse.h | 211 + thirdparty/eigen/Eigen/src/Core/Select.h | 162 + .../eigen/Eigen/src/Core/SelfAdjointView.h | 308 + .../eigen/Eigen/src/Core/SelfCwiseBinaryOp.h | 51 + thirdparty/eigen/Eigen/src/Core/Solve.h | 188 + .../eigen/Eigen/src/Core/SolveTriangular.h | 230 + thirdparty/eigen/Eigen/src/Core/SolverBase.h | 130 + thirdparty/eigen/Eigen/src/Core/StableNorm.h | 220 + thirdparty/eigen/Eigen/src/Core/Stride.h | 111 + thirdparty/eigen/Eigen/src/Core/Swap.h | 67 + thirdparty/eigen/Eigen/src/Core/Transpose.h | 403 + .../eigen/Eigen/src/Core/Transpositions.h | 407 + .../eigen/Eigen/src/Core/TriangularMatrix.h | 978 + thirdparty/eigen/Eigen/src/Core/VectorBlock.h | 96 + .../eigen/Eigen/src/Core/VectorwiseOp.h | 695 + thirdparty/eigen/Eigen/src/Core/Visitor.h | 271 + .../eigen/Eigen/src/Core/arch/AVX/Complex.h | 483 + .../Eigen/src/Core/arch/AVX/MathFunctions.h | 439 + .../Eigen/src/Core/arch/AVX/PacketMath.h | 636 + .../Eigen/src/Core/arch/AVX/TypeCasting.h | 51 + .../src/Core/arch/AVX512/MathFunctions.h | 396 + .../Eigen/src/Core/arch/AVX512/PacketMath.h | 1316 ++ .../Eigen/src/Core/arch/AltiVec/Complex.h | 461 + .../src/Core/arch/AltiVec/MathFunctions.h | 320 + .../Eigen/src/Core/arch/AltiVec/PacketMath.h | 1033 + .../eigen/Eigen/src/Core/arch/CUDA/Complex.h | 103 + .../eigen/Eigen/src/Core/arch/CUDA/Half.h | 601 + .../Eigen/src/Core/arch/CUDA/MathFunctions.h | 91 + .../Eigen/src/Core/arch/CUDA/PacketMath.h | 333 + .../Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 1123 ++ .../Eigen/src/Core/arch/CUDA/TypeCasting.h | 212 + .../Eigen/src/Core/arch/Default/Settings.h | 49 + .../eigen/Eigen/src/Core/arch/NEON/Complex.h | 486 + .../Eigen/src/Core/arch/NEON/MathFunctions.h | 91 + .../Eigen/src/Core/arch/NEON/PacketMath.h | 724 + .../eigen/Eigen/src/Core/arch/SSE/Complex.h | 503 + .../Eigen/src/Core/arch/SSE/MathFunctions.h | 562 + .../Eigen/src/Core/arch/SSE/PacketMath.h | 873 + .../Eigen/src/Core/arch/SSE/TypeCasting.h | 77 + .../Eigen/src/Core/arch/ZVector/Complex.h | 394 + .../src/Core/arch/ZVector/MathFunctions.h | 137 + .../Eigen/src/Core/arch/ZVector/PacketMath.h | 945 + .../src/Core/functors/AssignmentFunctors.h | 168 + .../Eigen/src/Core/functors/BinaryFunctors.h | 482 + .../Eigen/src/Core/functors/NullaryFunctors.h | 182 + .../Eigen/src/Core/functors/StlFunctors.h | 132 + .../Eigen/src/Core/functors/TernaryFunctors.h | 25 + .../Eigen/src/Core/functors/UnaryFunctors.h | 810 + .../Core/products/GeneralBlockPanelKernel.h | 2149 ++ .../src/Core/products/GeneralMatrixMatrix.h | 492 + .../products/GeneralMatrixMatrixTriangular.h | 300 + .../GeneralMatrixMatrixTriangular_BLAS.h | 141 + .../Core/products/GeneralMatrixMatrix_BLAS.h | 115 + .../src/Core/products/GeneralMatrixVector.h | 619 + .../Core/products/GeneralMatrixVector_BLAS.h | 129 + .../Eigen/src/Core/products/Parallelizer.h | 162 + .../Core/products/SelfadjointMatrixMatrix.h | 521 + .../products/SelfadjointMatrixMatrix_BLAS.h | 275 + .../Core/products/SelfadjointMatrixVector.h | 260 + .../products/SelfadjointMatrixVector_BLAS.h | 111 + .../src/Core/products/SelfadjointProduct.h | 133 + .../Core/products/SelfadjointRank2Update.h | 93 + .../Core/products/TriangularMatrixMatrix.h | 441 + .../products/TriangularMatrixMatrix_BLAS.h | 302 + .../Core/products/TriangularMatrixVector.h | 336 + .../products/TriangularMatrixVector_BLAS.h | 241 + .../Core/products/TriangularSolverMatrix.h | 334 + .../products/TriangularSolverMatrix_BLAS.h | 151 + .../Core/products/TriangularSolverVector.h | 145 + .../eigen/Eigen/src/Core/util/BlasUtil.h | 398 + .../eigen/Eigen/src/Core/util/Constants.h | 547 + .../src/Core/util/DisableStupidWarnings.h | 75 + .../Eigen/src/Core/util/ForwardDeclarations.h | 302 + .../eigen/Eigen/src/Core/util/MKL_support.h | 128 + thirdparty/eigen/Eigen/src/Core/util/Macros.h | 993 + thirdparty/eigen/Eigen/src/Core/util/Memory.h | 977 + thirdparty/eigen/Eigen/src/Core/util/Meta.h | 492 + .../eigen/Eigen/src/Core/util/NonMPL2.h | 3 + .../src/Core/util/ReenableStupidWarnings.h | 27 + .../eigen/Eigen/src/Core/util/StaticAssert.h | 216 + .../eigen/Eigen/src/Core/util/XprHelper.h | 816 + .../src/Eigenvalues/ComplexEigenSolver.h | 344 + .../Eigen/src/Eigenvalues/ComplexSchur.h | 459 + .../src/Eigenvalues/ComplexSchur_LAPACKE.h | 91 + .../eigen/Eigen/src/Eigenvalues/EigenSolver.h | 622 + .../src/Eigenvalues/GeneralizedEigenSolver.h | 419 + .../GeneralizedSelfAdjointEigenSolver.h | 226 + .../src/Eigenvalues/HessenbergDecomposition.h | 374 + .../src/Eigenvalues/MatrixBaseEigenvalues.h | 160 + .../eigen/Eigen/src/Eigenvalues/RealQZ.h | 654 + .../eigen/Eigen/src/Eigenvalues/RealSchur.h | 534 + .../Eigen/src/Eigenvalues/RealSchur_LAPACKE.h | 77 + .../src/Eigenvalues/SelfAdjointEigenSolver.h | 869 + .../SelfAdjointEigenSolver_LAPACKE.h | 90 + .../src/Eigenvalues/Tridiagonalization.h | 556 + .../eigen/Eigen/src/Geometry/AlignedBox.h | 392 + .../eigen/Eigen/src/Geometry/AngleAxis.h | 247 + .../eigen/Eigen/src/Geometry/EulerAngles.h | 114 + .../eigen/Eigen/src/Geometry/Homogeneous.h | 497 + .../eigen/Eigen/src/Geometry/Hyperplane.h | 279 + .../eigen/Eigen/src/Geometry/OrthoMethods.h | 234 + .../Eigen/src/Geometry/ParametrizedLine.h | 195 + .../eigen/Eigen/src/Geometry/Quaternion.h | 811 + .../eigen/Eigen/src/Geometry/Rotation2D.h | 199 + .../eigen/Eigen/src/Geometry/RotationBase.h | 206 + thirdparty/eigen/Eigen/src/Geometry/Scaling.h | 170 + .../eigen/Eigen/src/Geometry/Transform.h | 1542 ++ .../eigen/Eigen/src/Geometry/Translation.h | 208 + thirdparty/eigen/Eigen/src/Geometry/Umeyama.h | 166 + .../Eigen/src/Geometry/arch/Geometry_SSE.h | 141 + .../Eigen/src/Householder/BlockHouseholder.h | 102 + .../eigen/Eigen/src/Householder/Householder.h | 172 + .../src/Householder/HouseholderSequence.h | 470 + .../BasicPreconditioners.h | 211 + .../src/IterativeLinearSolvers/BiCGSTAB.h | 228 + .../ConjugateGradient.h | 245 + .../IncompleteCholesky.h | 400 + .../IterativeLinearSolvers/IncompleteLUT.h | 462 + .../IterativeSolverBase.h | 394 + .../LeastSquareConjugateGradient.h | 216 + .../IterativeLinearSolvers/SolveWithGuess.h | 115 + thirdparty/eigen/Eigen/src/Jacobi/Jacobi.h | 433 + thirdparty/eigen/Eigen/src/LU/Determinant.h | 101 + thirdparty/eigen/Eigen/src/LU/FullPivLU.h | 891 + thirdparty/eigen/Eigen/src/LU/InverseImpl.h | 415 + thirdparty/eigen/Eigen/src/LU/PartialPivLU.h | 611 + .../eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h | 83 + .../eigen/Eigen/src/LU/arch/Inverse_SSE.h | 338 + .../Eigen/src/MetisSupport/MetisSupport.h | 137 + .../eigen/Eigen/src/OrderingMethods/Amd.h | 445 + .../Eigen/src/OrderingMethods/Eigen_Colamd.h | 1843 ++ .../Eigen/src/OrderingMethods/Ordering.h | 157 + .../Eigen/src/PaStiXSupport/PaStiXSupport.h | 678 + .../Eigen/src/PardisoSupport/PardisoSupport.h | 543 + .../eigen/Eigen/src/QR/ColPivHouseholderQR.h | 653 + .../src/QR/ColPivHouseholderQR_LAPACKE.h | 97 + .../src/QR/CompleteOrthogonalDecomposition.h | 562 + .../eigen/Eigen/src/QR/FullPivHouseholderQR.h | 676 + thirdparty/eigen/Eigen/src/QR/HouseholderQR.h | 409 + .../Eigen/src/QR/HouseholderQR_LAPACKE.h | 68 + .../src/SPQRSupport/SuiteSparseQRSupport.h | 313 + thirdparty/eigen/Eigen/src/SVD/BDCSVD.h | 1230 ++ thirdparty/eigen/Eigen/src/SVD/JacobiSVD.h | 800 + .../eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h | 90 + thirdparty/eigen/Eigen/src/SVD/SVDBase.h | 313 + .../Eigen/src/SVD/UpperBidiagonalization.h | 412 + .../src/SparseCholesky/SimplicialCholesky.h | 689 + .../SparseCholesky/SimplicialCholesky_impl.h | 199 + .../eigen/Eigen/src/SparseCore/AmbiVector.h | 377 + .../Eigen/src/SparseCore/CompressedStorage.h | 258 + .../ConservativeSparseSparseProduct.h | 345 + .../Eigen/src/SparseCore/MappedSparseMatrix.h | 67 + .../eigen/Eigen/src/SparseCore/SparseAssign.h | 219 + .../eigen/Eigen/src/SparseCore/SparseBlock.h | 603 + .../Eigen/src/SparseCore/SparseColEtree.h | 206 + .../src/SparseCore/SparseCompressedBase.h | 326 + .../src/SparseCore/SparseCwiseBinaryOp.h | 700 + .../Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 198 + .../Eigen/src/SparseCore/SparseDenseProduct.h | 320 + .../src/SparseCore/SparseDiagonalProduct.h | 134 + .../eigen/Eigen/src/SparseCore/SparseDot.h | 98 + .../eigen/Eigen/src/SparseCore/SparseFuzzy.h | 29 + .../eigen/Eigen/src/SparseCore/SparseMap.h | 305 + .../eigen/Eigen/src/SparseCore/SparseMatrix.h | 1391 ++ .../Eigen/src/SparseCore/SparseMatrixBase.h | 396 + .../Eigen/src/SparseCore/SparsePermutation.h | 178 + .../Eigen/src/SparseCore/SparseProduct.h | 169 + .../eigen/Eigen/src/SparseCore/SparseRedux.h | 49 + .../eigen/Eigen/src/SparseCore/SparseRef.h | 378 + .../src/SparseCore/SparseSelfAdjointView.h | 626 + .../Eigen/src/SparseCore/SparseSolverBase.h | 124 + .../SparseSparseProductWithPruning.h | 198 + .../Eigen/src/SparseCore/SparseTranspose.h | 104 + .../src/SparseCore/SparseTriangularView.h | 186 + .../eigen/Eigen/src/SparseCore/SparseUtil.h | 178 + .../eigen/Eigen/src/SparseCore/SparseVector.h | 478 + .../eigen/Eigen/src/SparseCore/SparseView.h | 222 + .../Eigen/src/SparseCore/TriangularSolver.h | 310 + .../eigen/Eigen/src/SparseLU/SparseLU.h | 775 + .../eigen/Eigen/src/SparseLU/SparseLUImpl.h | 66 + .../Eigen/src/SparseLU/SparseLU_Memory.h | 226 + .../Eigen/src/SparseLU/SparseLU_Structs.h | 110 + .../src/SparseLU/SparseLU_SupernodalMatrix.h | 301 + .../eigen/Eigen/src/SparseLU/SparseLU_Utils.h | 80 + .../Eigen/src/SparseLU/SparseLU_column_bmod.h | 181 + .../Eigen/src/SparseLU/SparseLU_column_dfs.h | 179 + .../src/SparseLU/SparseLU_copy_to_ucol.h | 107 + .../Eigen/src/SparseLU/SparseLU_gemm_kernel.h | 279 + .../src/SparseLU/SparseLU_heap_relax_snode.h | 126 + .../Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 130 + .../Eigen/src/SparseLU/SparseLU_panel_bmod.h | 223 + .../Eigen/src/SparseLU/SparseLU_panel_dfs.h | 258 + .../Eigen/src/SparseLU/SparseLU_pivotL.h | 137 + .../Eigen/src/SparseLU/SparseLU_pruneL.h | 136 + .../Eigen/src/SparseLU/SparseLU_relax_snode.h | 83 + .../eigen/Eigen/src/SparseQR/SparseQR.h | 739 + .../eigen/Eigen/src/StlSupport/StdDeque.h | 126 + .../eigen/Eigen/src/StlSupport/StdList.h | 106 + .../eigen/Eigen/src/StlSupport/StdVector.h | 131 + .../eigen/Eigen/src/StlSupport/details.h | 84 + .../Eigen/src/SuperLUSupport/SuperLUSupport.h | 1024 + .../Eigen/src/UmfPackSupport/UmfPackSupport.h | 457 + thirdparty/eigen/Eigen/src/misc/Image.h | 82 + thirdparty/eigen/Eigen/src/misc/Kernel.h | 79 + thirdparty/eigen/Eigen/src/misc/RealSvd2x2.h | 55 + thirdparty/eigen/Eigen/src/misc/blas.h | 440 + thirdparty/eigen/Eigen/src/misc/lapack.h | 152 + thirdparty/eigen/Eigen/src/misc/lapacke.h | 16291 ++++++++++++++++ .../eigen/Eigen/src/misc/lapacke_mangling.h | 17 + .../Eigen/src/plugins/ArrayCwiseBinaryOps.h | 332 + .../Eigen/src/plugins/ArrayCwiseUnaryOps.h | 552 + .../eigen/Eigen/src/plugins/BlockMethods.h | 1058 + .../Eigen/src/plugins/CommonCwiseBinaryOps.h | 115 + .../Eigen/src/plugins/CommonCwiseUnaryOps.h | 163 + .../Eigen/src/plugins/MatrixCwiseBinaryOps.h | 152 + .../Eigen/src/plugins/MatrixCwiseUnaryOps.h | 85 + thirdparty/eigen/INSTALL | 35 + thirdparty/eigen/README.md | 3 + thirdparty/eigen/bench/BenchSparseUtil.h | 149 + thirdparty/eigen/bench/BenchTimer.h | 195 + thirdparty/eigen/bench/BenchUtil.h | 92 + thirdparty/eigen/bench/README.txt | 55 + .../eigen/bench/analyze-blocking-sizes.cpp | 876 + thirdparty/eigen/bench/basicbench.cxxlist | 28 + thirdparty/eigen/bench/basicbenchmark.cpp | 35 + thirdparty/eigen/bench/basicbenchmark.h | 63 + thirdparty/eigen/bench/benchBlasGemm.cpp | 219 + thirdparty/eigen/bench/benchCholesky.cpp | 142 + thirdparty/eigen/bench/benchEigenSolver.cpp | 212 + thirdparty/eigen/bench/benchFFT.cpp | 115 + thirdparty/eigen/bench/benchGeometry.cpp | 134 + thirdparty/eigen/bench/benchVecAdd.cpp | 135 + thirdparty/eigen/bench/bench_gemm.cpp | 340 + .../eigen/bench/bench_multi_compilers.sh | 28 + thirdparty/eigen/bench/bench_norm.cpp | 360 + thirdparty/eigen/bench/bench_reverse.cpp | 84 + thirdparty/eigen/bench/bench_sum.cpp | 18 + thirdparty/eigen/bench/bench_unrolling | 12 + .../eigen/bench/benchmark-blocking-sizes.cpp | 677 + thirdparty/eigen/bench/benchmark.cpp | 39 + thirdparty/eigen/bench/benchmarkSlice.cpp | 38 + thirdparty/eigen/bench/benchmarkX.cpp | 36 + thirdparty/eigen/bench/benchmarkXcwise.cpp | 35 + thirdparty/eigen/bench/benchmark_suite | 18 + thirdparty/eigen/bench/btl/CMakeLists.txt | 107 + thirdparty/eigen/bench/btl/COPYING | 340 + thirdparty/eigen/bench/btl/README | 154 + .../bench/btl/actions/action_aat_product.hh | 145 + .../bench/btl/actions/action_ata_product.hh | 145 + .../bench/btl/actions/action_atv_product.hh | 134 + .../eigen/bench/btl/actions/action_axpby.hh | 127 + .../eigen/bench/btl/actions/action_axpy.hh | 139 + .../bench/btl/actions/action_cholesky.hh | 128 + .../eigen/bench/btl/actions/action_ger.hh | 128 + .../bench/btl/actions/action_hessenberg.hh | 233 + .../bench/btl/actions/action_lu_decomp.hh | 124 + .../bench/btl/actions/action_lu_solve.hh | 136 + .../actions/action_matrix_matrix_product.hh | 150 + .../action_matrix_matrix_product_bis.hh | 152 + .../actions/action_matrix_vector_product.hh | 153 + .../bench/btl/actions/action_partial_lu.hh | 125 + .../eigen/bench/btl/actions/action_rot.hh | 116 + .../eigen/bench/btl/actions/action_symv.hh | 139 + .../eigen/bench/btl/actions/action_syr2.hh | 133 + .../bench/btl/actions/action_trisolve.hh | 137 + .../btl/actions/action_trisolve_matrix.hh | 165 + .../eigen/bench/btl/actions/action_trmm.hh | 165 + .../eigen/bench/btl/actions/basic_actions.hh | 21 + .../eigen/bench/btl/cmake/FindACML.cmake | 51 + .../eigen/bench/btl/cmake/FindATLAS.cmake | 31 + .../eigen/bench/btl/cmake/FindBLAZE.cmake | 31 + .../eigen/bench/btl/cmake/FindBlitz.cmake | 40 + .../eigen/bench/btl/cmake/FindCBLAS.cmake | 35 + .../eigen/bench/btl/cmake/FindGMM.cmake | 17 + .../eigen/bench/btl/cmake/FindMKL.cmake | 65 + .../eigen/bench/btl/cmake/FindMTL4.cmake | 31 + .../eigen/bench/btl/cmake/FindOPENBLAS.cmake | 17 + .../cmake/FindPackageHandleStandardArgs.cmake | 60 + .../eigen/bench/btl/cmake/FindTvmet.cmake | 32 + .../cmake/MacroOptionalAddSubdirectory.cmake | 31 + .../eigen/bench/btl/data/CMakeLists.txt | 32 + .../eigen/bench/btl/data/action_settings.txt | 19 + .../bench/btl/data/gnuplot_common_settings.hh | 87 + thirdparty/eigen/bench/btl/data/go_mean | 58 + thirdparty/eigen/bench/btl/data/mean.cxx | 182 + .../eigen/bench/btl/data/mk_gnuplot_script.sh | 68 + .../eigen/bench/btl/data/mk_mean_script.sh | 52 + .../eigen/bench/btl/data/mk_new_gnuplot.sh | 54 + .../bench/btl/data/perlib_plot_settings.txt | 16 + .../eigen/bench/btl/data/regularize.cxx | 131 + thirdparty/eigen/bench/btl/data/smooth.cxx | 198 + thirdparty/eigen/bench/btl/data/smooth_all.sh | 68 + .../eigen/bench/btl/generic_bench/bench.hh | 168 + .../btl/generic_bench/bench_parameter.hh | 53 + .../eigen/bench/btl/generic_bench/btl.hh | 242 + .../btl/generic_bench/init/init_function.hh | 54 + .../btl/generic_bench/init/init_matrix.hh | 64 + .../btl/generic_bench/init/init_vector.hh | 37 + .../btl/generic_bench/static/bench_static.hh | 80 + .../static/intel_bench_fixed_size.hh | 66 + .../static/static_size_generator.hh | 57 + .../generic_bench/timers/STL_perf_analyzer.hh | 82 + .../btl/generic_bench/timers/STL_timer.hh | 78 + .../timers/mixed_perf_analyzer.hh | 73 + .../timers/portable_perf_analyzer.hh | 103 + .../timers/portable_perf_analyzer_old.hh | 134 + .../generic_bench/timers/portable_timer.hh | 187 + .../generic_bench/timers/x86_perf_analyzer.hh | 108 + .../btl/generic_bench/timers/x86_timer.hh | 246 + .../btl/generic_bench/utils/size_lin_log.hh | 70 + .../bench/btl/generic_bench/utils/size_log.hh | 54 + .../bench/btl/generic_bench/utils/utilities.h | 90 + .../bench/btl/generic_bench/utils/xy_file.hh | 75 + .../eigen/bench/btl/libs/BLAS/CMakeLists.txt | 47 + thirdparty/eigen/bench/btl/libs/BLAS/blas.h | 675 + .../bench/btl/libs/BLAS/blas_interface.hh | 83 + .../btl/libs/BLAS/blas_interface_impl.hh | 147 + .../bench/btl/libs/BLAS/c_interface_base.h | 73 + thirdparty/eigen/bench/btl/libs/BLAS/main.cpp | 73 + .../eigen/bench/btl/libs/STL/CMakeLists.txt | 2 + .../eigen/bench/btl/libs/STL/STL_interface.hh | 244 + thirdparty/eigen/bench/btl/libs/STL/main.cpp | 42 + .../eigen/bench/btl/libs/blaze/CMakeLists.txt | 13 + .../bench/btl/libs/blaze/blaze_interface.hh | 140 + .../eigen/bench/btl/libs/blaze/main.cpp | 40 + .../eigen/bench/btl/libs/blitz/CMakeLists.txt | 17 + .../libs/blitz/blitz_LU_solve_interface.hh | 192 + .../bench/btl/libs/blitz/blitz_interface.hh | 147 + .../eigen/bench/btl/libs/blitz/btl_blitz.cpp | 51 + .../bench/btl/libs/blitz/btl_tiny_blitz.cpp | 38 + .../btl/libs/blitz/tiny_blitz_interface.hh | 106 + .../bench/btl/libs/eigen2/CMakeLists.txt | 19 + .../bench/btl/libs/eigen2/btl_tiny_eigen2.cpp | 46 + .../bench/btl/libs/eigen2/eigen2_interface.hh | 168 + .../eigen/bench/btl/libs/eigen2/main_adv.cpp | 44 + .../bench/btl/libs/eigen2/main_linear.cpp | 34 + .../bench/btl/libs/eigen2/main_matmat.cpp | 35 + .../bench/btl/libs/eigen2/main_vecmat.cpp | 36 + .../bench/btl/libs/eigen3/CMakeLists.txt | 65 + .../bench/btl/libs/eigen3/btl_tiny_eigen3.cpp | 46 + .../bench/btl/libs/eigen3/eigen3_interface.hh | 240 + .../eigen/bench/btl/libs/eigen3/main_adv.cpp | 44 + .../bench/btl/libs/eigen3/main_linear.cpp | 35 + .../bench/btl/libs/eigen3/main_matmat.cpp | 35 + .../bench/btl/libs/eigen3/main_vecmat.cpp | 36 + .../eigen/bench/btl/libs/gmm/CMakeLists.txt | 6 + .../btl/libs/gmm/gmm_LU_solve_interface.hh | 192 + .../eigen/bench/btl/libs/gmm/gmm_interface.hh | 144 + thirdparty/eigen/bench/btl/libs/gmm/main.cpp | 51 + .../eigen/bench/btl/libs/mtl4/.kdbgrc.main | 12 + .../eigen/bench/btl/libs/mtl4/CMakeLists.txt | 6 + thirdparty/eigen/bench/btl/libs/mtl4/main.cpp | 46 + .../btl/libs/mtl4/mtl4_LU_solve_interface.hh | 192 + .../bench/btl/libs/mtl4/mtl4_interface.hh | 144 + .../bench/btl/libs/tensors/CMakeLists.txt | 44 + .../bench/btl/libs/tensors/main_linear.cpp | 23 + .../bench/btl/libs/tensors/main_matmat.cpp | 21 + .../bench/btl/libs/tensors/main_vecmat.cpp | 21 + .../btl/libs/tensors/tensor_interface.hh | 105 + .../eigen/bench/btl/libs/tvmet/CMakeLists.txt | 6 + .../eigen/bench/btl/libs/tvmet/main.cpp | 40 + .../bench/btl/libs/tvmet/tvmet_interface.hh | 104 + .../eigen/bench/btl/libs/ublas/CMakeLists.txt | 7 + .../eigen/bench/btl/libs/ublas/main.cpp | 44 + .../bench/btl/libs/ublas/ublas_interface.hh | 141 + .../eigen/bench/check_cache_queries.cpp | 101 + thirdparty/eigen/bench/dense_solvers.cpp | 186 + thirdparty/eigen/bench/eig33.cpp | 195 + thirdparty/eigen/bench/geometry.cpp | 126 + .../bench/perf_monitoring/gemm/changesets.txt | 61 + .../eigen/bench/perf_monitoring/gemm/gemm.cpp | 67 + .../perf_monitoring/gemm/gemm_settings.txt | 15 + .../bench/perf_monitoring/gemm/lazy_gemm.cpp | 98 + .../gemm/lazy_gemm_settings.txt | 15 + .../bench/perf_monitoring/gemm/make_plot.sh | 38 + .../eigen/bench/perf_monitoring/gemm/run.sh | 156 + thirdparty/eigen/bench/product_threshold.cpp | 143 + thirdparty/eigen/bench/quat_slerp.cpp | 247 + thirdparty/eigen/bench/quatmul.cpp | 47 + thirdparty/eigen/bench/sparse_cholesky.cpp | 216 + .../eigen/bench/sparse_dense_product.cpp | 187 + thirdparty/eigen/bench/sparse_lu.cpp | 132 + thirdparty/eigen/bench/sparse_product.cpp | 323 + .../eigen/bench/sparse_randomsetter.cpp | 125 + thirdparty/eigen/bench/sparse_setter.cpp | 485 + thirdparty/eigen/bench/sparse_transpose.cpp | 104 + thirdparty/eigen/bench/sparse_trisolver.cpp | 220 + thirdparty/eigen/bench/spbench/CMakeLists.txt | 78 + thirdparty/eigen/bench/spbench/sp_solver.cpp | 125 + thirdparty/eigen/bench/spbench/spbench.dtd | 31 + .../eigen/bench/spbench/spbenchsolver.cpp | 87 + .../eigen/bench/spbench/spbenchsolver.h | 554 + thirdparty/eigen/bench/spbench/spbenchstyle.h | 95 + .../eigen/bench/spbench/test_sparseLU.cpp | 93 + thirdparty/eigen/bench/spmv.cpp | 233 + thirdparty/eigen/bench/tensors/README | 21 + thirdparty/eigen/bench/tensors/benchmark.h | 49 + .../eigen/bench/tensors/benchmark_main.cc | 237 + .../tensors/contraction_benchmarks_cpu.cc | 39 + .../eigen/bench/tensors/tensor_benchmarks.h | 478 + .../bench/tensors/tensor_benchmarks_cpu.cc | 168 + .../tensors/tensor_benchmarks_fp16_gpu.cu | 77 + .../bench/tensors/tensor_benchmarks_gpu.cu | 75 + .../bench/tensors/tensor_benchmarks_sycl.cc | 37 + thirdparty/eigen/bench/vdw_new.cpp | 56 + thirdparty/eigen/blas/BandTriangularSolver.h | 97 + thirdparty/eigen/blas/CMakeLists.txt | 55 + thirdparty/eigen/blas/GeneralRank1Update.h | 44 + .../eigen/blas/PackedSelfadjointProduct.h | 53 + .../eigen/blas/PackedTriangularMatrixVector.h | 79 + .../eigen/blas/PackedTriangularSolverVector.h | 88 + thirdparty/eigen/blas/README.txt | 6 + thirdparty/eigen/blas/Rank2Update.h | 57 + thirdparty/eigen/blas/common.h | 163 + thirdparty/eigen/blas/complex_double.cpp | 20 + thirdparty/eigen/blas/complex_single.cpp | 20 + thirdparty/eigen/blas/double.cpp | 32 + thirdparty/eigen/blas/f2c/chbmv.c | 487 + thirdparty/eigen/blas/f2c/chpmv.c | 438 + thirdparty/eigen/blas/f2c/complexdots.c | 84 + thirdparty/eigen/blas/f2c/ctbmv.c | 647 + thirdparty/eigen/blas/f2c/d_cnjg.c | 6 + thirdparty/eigen/blas/f2c/datatypes.h | 24 + thirdparty/eigen/blas/f2c/drotm.c | 215 + thirdparty/eigen/blas/f2c/drotmg.c | 293 + thirdparty/eigen/blas/f2c/dsbmv.c | 366 + thirdparty/eigen/blas/f2c/dspmv.c | 316 + thirdparty/eigen/blas/f2c/dtbmv.c | 428 + thirdparty/eigen/blas/f2c/lsame.c | 117 + thirdparty/eigen/blas/f2c/r_cnjg.c | 6 + thirdparty/eigen/blas/f2c/srotm.c | 216 + thirdparty/eigen/blas/f2c/srotmg.c | 295 + thirdparty/eigen/blas/f2c/ssbmv.c | 368 + thirdparty/eigen/blas/f2c/sspmv.c | 316 + thirdparty/eigen/blas/f2c/stbmv.c | 428 + thirdparty/eigen/blas/f2c/zhbmv.c | 488 + thirdparty/eigen/blas/f2c/zhpmv.c | 438 + thirdparty/eigen/blas/f2c/ztbmv.c | 647 + thirdparty/eigen/blas/fortran/complexdots.f | 43 + thirdparty/eigen/blas/level1_cplx_impl.h | 133 + thirdparty/eigen/blas/level1_impl.h | 166 + thirdparty/eigen/blas/level1_real_impl.h | 100 + thirdparty/eigen/blas/level2_cplx_impl.h | 360 + thirdparty/eigen/blas/level2_impl.h | 553 + thirdparty/eigen/blas/level2_real_impl.h | 306 + thirdparty/eigen/blas/level3_impl.h | 702 + thirdparty/eigen/blas/single.cpp | 22 + thirdparty/eigen/blas/testing/CMakeLists.txt | 40 + thirdparty/eigen/blas/testing/cblat1.f | 724 + thirdparty/eigen/blas/testing/cblat2.dat | 35 + thirdparty/eigen/blas/testing/cblat2.f | 3279 ++++ thirdparty/eigen/blas/testing/cblat3.dat | 23 + thirdparty/eigen/blas/testing/cblat3.f | 3492 ++++ thirdparty/eigen/blas/testing/dblat1.f | 1065 + thirdparty/eigen/blas/testing/dblat2.dat | 34 + thirdparty/eigen/blas/testing/dblat2.f | 3176 +++ thirdparty/eigen/blas/testing/dblat3.dat | 20 + thirdparty/eigen/blas/testing/dblat3.f | 2873 +++ thirdparty/eigen/blas/testing/runblastest.sh | 45 + thirdparty/eigen/blas/testing/sblat1.f | 1021 + thirdparty/eigen/blas/testing/sblat2.dat | 34 + thirdparty/eigen/blas/testing/sblat2.f | 3176 +++ thirdparty/eigen/blas/testing/sblat3.dat | 20 + thirdparty/eigen/blas/testing/sblat3.f | 2873 +++ thirdparty/eigen/blas/testing/zblat1.f | 724 + thirdparty/eigen/blas/testing/zblat2.dat | 35 + thirdparty/eigen/blas/testing/zblat2.f | 3287 ++++ thirdparty/eigen/blas/testing/zblat3.dat | 23 + thirdparty/eigen/blas/testing/zblat3.f | 3502 ++++ thirdparty/eigen/blas/xerbla.cpp | 23 + thirdparty/eigen/cmake/Eigen3Config.cmake.in | 28 + .../eigen/cmake/EigenConfigureTesting.cmake | 61 + .../eigen/cmake/EigenDetermineOSVersion.cmake | 46 + .../cmake/EigenDetermineVSServicePack.cmake | 41 + thirdparty/eigen/cmake/EigenTesting.cmake | 723 + thirdparty/eigen/cmake/EigenUninstall.cmake | 40 + thirdparty/eigen/cmake/FindAdolc.cmake | 20 + thirdparty/eigen/cmake/FindBLAS.cmake | 419 + thirdparty/eigen/cmake/FindCholmod.cmake | 89 + thirdparty/eigen/cmake/FindComputeCpp.cmake | 245 + thirdparty/eigen/cmake/FindEigen2.cmake | 80 + thirdparty/eigen/cmake/FindEigen3.cmake | 97 + thirdparty/eigen/cmake/FindFFTW.cmake | 119 + thirdparty/eigen/cmake/FindGLEW.cmake | 105 + thirdparty/eigen/cmake/FindGMP.cmake | 21 + thirdparty/eigen/cmake/FindGSL.cmake | 170 + thirdparty/eigen/cmake/FindGoogleHash.cmake | 23 + thirdparty/eigen/cmake/FindLAPACK.cmake | 273 + thirdparty/eigen/cmake/FindMPFR.cmake | 83 + thirdparty/eigen/cmake/FindMetis.cmake | 59 + thirdparty/eigen/cmake/FindPastix.cmake | 25 + thirdparty/eigen/cmake/FindSPQR.cmake | 41 + thirdparty/eigen/cmake/FindScotch.cmake | 24 + .../eigen/cmake/FindStandardMathLibrary.cmake | 64 + thirdparty/eigen/cmake/FindSuperLU.cmake | 97 + thirdparty/eigen/cmake/FindUmfpack.cmake | 53 + thirdparty/eigen/cmake/RegexUtils.cmake | 19 + thirdparty/eigen/cmake/UseEigen3.cmake | 6 + thirdparty/eigen/cmake/language_support.cmake | 67 + thirdparty/eigen/debug/gdb/__init__.py | 1 + thirdparty/eigen/debug/gdb/printers.py | 214 + thirdparty/eigen/debug/msvc/eigen.natvis | 235 + .../eigen/debug/msvc/eigen_autoexp_part.dat | 295 + thirdparty/eigen/demos/CMakeLists.txt | 13 + .../eigen/demos/mandelbrot/CMakeLists.txt | 21 + thirdparty/eigen/demos/mandelbrot/README | 10 + .../eigen/demos/mandelbrot/mandelbrot.cpp | 213 + .../eigen/demos/mandelbrot/mandelbrot.h | 71 + thirdparty/eigen/demos/mix_eigen_and_c/README | 9 + .../demos/mix_eigen_and_c/binary_library.cpp | 185 + .../demos/mix_eigen_and_c/binary_library.h | 71 + .../eigen/demos/mix_eigen_and_c/example.c | 65 + thirdparty/eigen/demos/opengl/CMakeLists.txt | 28 + thirdparty/eigen/demos/opengl/README | 13 + thirdparty/eigen/demos/opengl/camera.cpp | 264 + thirdparty/eigen/demos/opengl/camera.h | 118 + thirdparty/eigen/demos/opengl/gpuhelper.cpp | 126 + thirdparty/eigen/demos/opengl/gpuhelper.h | 207 + thirdparty/eigen/demos/opengl/icosphere.cpp | 120 + thirdparty/eigen/demos/opengl/icosphere.h | 30 + .../eigen/demos/opengl/quaternion_demo.cpp | 656 + .../eigen/demos/opengl/quaternion_demo.h | 114 + thirdparty/eigen/demos/opengl/trackball.cpp | 59 + thirdparty/eigen/demos/opengl/trackball.h | 42 + thirdparty/eigen/doc/A05_PortingFrom2To3.dox | 299 + thirdparty/eigen/doc/AsciiQuickReference.txt | 215 + thirdparty/eigen/doc/B01_Experimental.dox | 52 + thirdparty/eigen/doc/CMakeLists.txt | 112 + thirdparty/eigen/doc/ClassHierarchy.dox | 129 + .../eigen/doc/CoeffwiseMathFunctionsTable.dox | 525 + .../doc/CustomizingEigen_CustomScalar.dox | 120 + .../doc/CustomizingEigen_InheritingMatrix.dox | 34 + .../doc/CustomizingEigen_NullaryExpr.dox | 86 + .../eigen/doc/CustomizingEigen_Plugins.dox | 69 + .../eigen/doc/DenseDecompositionBenchmark.dox | 42 + thirdparty/eigen/doc/Doxyfile.in | 1891 ++ .../eigen/doc/Eigen_Silly_Professor_64x64.png | Bin 0 -> 8355 bytes .../eigen/doc/FixedSizeVectorizable.dox | 38 + .../eigen/doc/FunctionsTakingEigenTypes.dox | 217 + thirdparty/eigen/doc/HiPerformance.dox | 128 + thirdparty/eigen/doc/InplaceDecomposition.dox | 115 + thirdparty/eigen/doc/InsideEigenExample.dox | 495 + thirdparty/eigen/doc/LeastSquares.dox | 70 + thirdparty/eigen/doc/Manual.dox | 188 + .../eigen/doc/MatrixfreeSolverExample.dox | 20 + thirdparty/eigen/doc/NewExpressionType.dox | 143 + thirdparty/eigen/doc/Overview.dox | 30 + thirdparty/eigen/doc/PassingByValue.dox | 40 + thirdparty/eigen/doc/Pitfalls.dox | 38 + .../eigen/doc/PreprocessorDirectives.dox | 166 + thirdparty/eigen/doc/QuickReference.dox | 785 + thirdparty/eigen/doc/QuickStartGuide.dox | 100 + thirdparty/eigen/doc/SparseLinearSystems.dox | 229 + thirdparty/eigen/doc/SparseQuickReference.dox | 272 + thirdparty/eigen/doc/StlContainers.dox | 62 + thirdparty/eigen/doc/StorageOrders.dox | 86 + .../eigen/doc/StructHavingEigenMembers.dox | 190 + thirdparty/eigen/doc/TemplateKeyword.dox | 133 + thirdparty/eigen/doc/TopicAliasing.dox | 237 + thirdparty/eigen/doc/TopicAssertions.dox | 108 + .../doc/TopicEigenExpressionTemplates.dox | 12 + thirdparty/eigen/doc/TopicLazyEvaluation.dox | 65 + .../doc/TopicLinearAlgebraDecompositions.dox | 263 + thirdparty/eigen/doc/TopicMultithreading.dox | 54 + thirdparty/eigen/doc/TopicResizing.dox | 11 + thirdparty/eigen/doc/TopicScalarTypes.dox | 12 + thirdparty/eigen/doc/TopicVectorization.dox | 9 + .../doc/TutorialAdvancedInitialization.dox | 162 + thirdparty/eigen/doc/TutorialArrayClass.dox | 192 + .../eigen/doc/TutorialBlockOperations.dox | 228 + thirdparty/eigen/doc/TutorialGeometry.dox | 242 + .../eigen/doc/TutorialLinearAlgebra.dox | 272 + thirdparty/eigen/doc/TutorialMapClass.dox | 86 + .../eigen/doc/TutorialMatrixArithmetic.dox | 214 + thirdparty/eigen/doc/TutorialMatrixClass.dox | 265 + ...TutorialReductionsVisitorsBroadcasting.dox | 266 + .../eigen/doc/TutorialReshapeSlicing.dox | 65 + thirdparty/eigen/doc/TutorialSparse.dox | 365 + .../doc/TutorialSparse_example_details.dox | 4 + thirdparty/eigen/doc/UnalignedArrayAssert.dox | 120 + .../eigen/doc/UsingBlasLapackBackends.dox | 133 + thirdparty/eigen/doc/UsingIntelMKL.dox | 107 + thirdparty/eigen/doc/UsingNVCC.dox | 32 + thirdparty/eigen/doc/WrongStackAlignment.dox | 56 + thirdparty/eigen/doc/eigen_navtree_hacks.js | 240 + thirdparty/eigen/doc/eigendoxy.css | 216 + thirdparty/eigen/doc/eigendoxy_footer.html.in | 36 + thirdparty/eigen/doc/eigendoxy_header.html.in | 61 + thirdparty/eigen/doc/eigendoxy_layout.xml.in | 178 + thirdparty/eigen/doc/eigendoxy_tabs.css | 59 + thirdparty/eigen/doc/examples/.krazy | 2 + thirdparty/eigen/doc/examples/CMakeLists.txt | 21 + .../examples/CustomizingEigen_Inheritance.cpp | 30 + thirdparty/eigen/doc/examples/Cwise_erf.cpp | 9 + thirdparty/eigen/doc/examples/Cwise_erfc.cpp | 9 + .../eigen/doc/examples/Cwise_lgamma.cpp | 9 + .../doc/examples/DenseBase_middleCols_int.cpp | 15 + .../doc/examples/DenseBase_middleRows_int.cpp | 15 + .../DenseBase_template_int_middleCols.cpp | 15 + .../DenseBase_template_int_middleRows.cpp | 15 + .../eigen/doc/examples/QuickStart_example.cpp | 14 + .../examples/QuickStart_example2_dynamic.cpp | 15 + .../examples/QuickStart_example2_fixed.cpp | 15 + .../doc/examples/TemplateKeyword_flexible.cpp | 22 + .../doc/examples/TemplateKeyword_simple.cpp | 20 + .../eigen/doc/examples/TutorialInplaceLU.cpp | 61 + .../examples/TutorialLinAlgComputeTwice.cpp | 23 + .../TutorialLinAlgExComputeSolveError.cpp | 14 + ...torialLinAlgExSolveColPivHouseholderQR.cpp | 17 + .../examples/TutorialLinAlgExSolveLDLT.cpp | 16 + .../TutorialLinAlgInverseDeterminant.cpp | 16 + .../examples/TutorialLinAlgRankRevealing.cpp | 20 + .../doc/examples/TutorialLinAlgSVDSolve.cpp | 15 + .../TutorialLinAlgSelfAdjointEigenSolver.cpp | 18 + .../examples/TutorialLinAlgSetThreshold.cpp | 16 + .../Tutorial_ArrayClass_accessors.cpp | 24 + .../examples/Tutorial_ArrayClass_addition.cpp | 23 + .../Tutorial_ArrayClass_cwise_other.cpp | 19 + .../examples/Tutorial_ArrayClass_interop.cpp | 22 + .../Tutorial_ArrayClass_interop_matrix.cpp | 26 + .../doc/examples/Tutorial_ArrayClass_mult.cpp | 16 + ...orial_BlockOperations_block_assignment.cpp | 18 + .../Tutorial_BlockOperations_colrow.cpp | 17 + .../Tutorial_BlockOperations_corner.cpp | 17 + .../Tutorial_BlockOperations_print_block.cpp | 20 + .../Tutorial_BlockOperations_vector.cpp | 14 + .../doc/examples/Tutorial_PartialLU_solve.cpp | 18 + ...ionsVisitorsBroadcasting_broadcast_1nn.cpp | 24 + ...sVisitorsBroadcasting_broadcast_simple.cpp | 21 + ...sBroadcasting_broadcast_simple_rowwise.cpp | 20 + ...ReductionsVisitorsBroadcasting_colwise.cpp | 13 + ...ReductionsVisitorsBroadcasting_maxnorm.cpp | 20 + ...nsVisitorsBroadcasting_reductions_bool.cpp | 21 + ...nsVisitorsBroadcasting_reductions_norm.cpp | 28 + ...rsBroadcasting_reductions_operatornorm.cpp | 18 + ...ReductionsVisitorsBroadcasting_rowwise.cpp | 13 + ...eductionsVisitorsBroadcasting_visitors.cpp | 26 + .../Tutorial_simple_example_dynamic_size.cpp | 22 + .../Tutorial_simple_example_fixed_size.cpp | 15 + thirdparty/eigen/doc/examples/class_Block.cpp | 27 + .../doc/examples/class_CwiseBinaryOp.cpp | 18 + .../eigen/doc/examples/class_CwiseUnaryOp.cpp | 19 + .../examples/class_CwiseUnaryOp_ptrfun.cpp | 20 + .../eigen/doc/examples/class_FixedBlock.cpp | 27 + .../doc/examples/class_FixedVectorBlock.cpp | 27 + .../eigen/doc/examples/class_VectorBlock.cpp | 27 + .../examples/function_taking_eigenbase.cpp | 18 + .../doc/examples/function_taking_ref.cpp | 19 + .../eigen/doc/examples/make_circulant.cpp | 11 + .../doc/examples/make_circulant.cpp.entry | 5 + .../doc/examples/make_circulant.cpp.evaluator | 32 + .../examples/make_circulant.cpp.expression | 20 + .../doc/examples/make_circulant.cpp.main | 8 + .../doc/examples/make_circulant.cpp.preamble | 4 + .../doc/examples/make_circulant.cpp.traits | 19 + .../eigen/doc/examples/make_circulant2.cpp | 52 + .../eigen/doc/examples/matrixfree_cg.cpp | 128 + .../eigen/doc/examples/nullary_indexing.cpp | 66 + .../doc/examples/tut_arithmetic_add_sub.cpp | 22 + .../doc/examples/tut_arithmetic_dot_cross.cpp | 15 + .../examples/tut_arithmetic_matrix_mul.cpp | 19 + .../examples/tut_arithmetic_redux_basic.cpp | 16 + .../tut_arithmetic_scalar_mul_div.cpp | 17 + .../tut_matrix_coefficient_accessors.cpp | 18 + .../eigen/doc/examples/tut_matrix_resize.cpp | 18 + .../examples/tut_matrix_resize_fixed_size.cpp | 12 + thirdparty/eigen/doc/ftv2node.png | Bin 0 -> 86 bytes thirdparty/eigen/doc/ftv2pnode.png | Bin 0 -> 229 bytes thirdparty/eigen/doc/snippets/.krazy | 2 + .../doc/snippets/AngleAxis_mimic_euler.cpp | 5 + .../eigen/doc/snippets/BiCGSTAB_simple.cpp | 11 + .../doc/snippets/BiCGSTAB_step_by_step.cpp | 14 + thirdparty/eigen/doc/snippets/CMakeLists.txt | 26 + .../snippets/ColPivHouseholderQR_solve.cpp | 8 + .../snippets/ComplexEigenSolver_compute.cpp | 16 + .../ComplexEigenSolver_eigenvalues.cpp | 4 + .../ComplexEigenSolver_eigenvectors.cpp | 4 + .../doc/snippets/ComplexSchur_compute.cpp | 6 + .../doc/snippets/ComplexSchur_matrixT.cpp | 4 + .../doc/snippets/ComplexSchur_matrixU.cpp | 4 + thirdparty/eigen/doc/snippets/Cwise_abs.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_abs2.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_acos.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_arg.cpp | 3 + .../doc/snippets/Cwise_array_power_array.cpp | 4 + thirdparty/eigen/doc/snippets/Cwise_asin.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_atan.cpp | 2 + .../eigen/doc/snippets/Cwise_boolean_and.cpp | 2 + .../eigen/doc/snippets/Cwise_boolean_not.cpp | 5 + .../eigen/doc/snippets/Cwise_boolean_or.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_ceil.cpp | 3 + thirdparty/eigen/doc/snippets/Cwise_cos.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_cosh.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_cube.cpp | 2 + .../eigen/doc/snippets/Cwise_equal_equal.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_exp.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_floor.cpp | 3 + .../eigen/doc/snippets/Cwise_greater.cpp | 2 + .../doc/snippets/Cwise_greater_equal.cpp | 2 + .../eigen/doc/snippets/Cwise_inverse.cpp | 2 + .../eigen/doc/snippets/Cwise_isFinite.cpp | 5 + thirdparty/eigen/doc/snippets/Cwise_isInf.cpp | 5 + thirdparty/eigen/doc/snippets/Cwise_isNaN.cpp | 5 + thirdparty/eigen/doc/snippets/Cwise_less.cpp | 2 + .../eigen/doc/snippets/Cwise_less_equal.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_log.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_log10.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_max.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_min.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_minus.cpp | 2 + .../eigen/doc/snippets/Cwise_minus_equal.cpp | 3 + .../eigen/doc/snippets/Cwise_not_equal.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_plus.cpp | 2 + .../eigen/doc/snippets/Cwise_plus_equal.cpp | 3 + thirdparty/eigen/doc/snippets/Cwise_pow.cpp | 2 + .../eigen/doc/snippets/Cwise_product.cpp | 4 + .../eigen/doc/snippets/Cwise_quotient.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_round.cpp | 3 + .../doc/snippets/Cwise_scalar_power_array.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_sign.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_sin.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_sinh.cpp | 2 + .../eigen/doc/snippets/Cwise_slash_equal.cpp | 3 + thirdparty/eigen/doc/snippets/Cwise_sqrt.cpp | 2 + .../eigen/doc/snippets/Cwise_square.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_tan.cpp | 2 + thirdparty/eigen/doc/snippets/Cwise_tanh.cpp | 2 + .../eigen/doc/snippets/Cwise_times_equal.cpp | 3 + .../doc/snippets/DenseBase_LinSpaced.cpp | 2 + .../doc/snippets/DenseBase_LinSpacedInt.cpp | 8 + .../doc/snippets/DenseBase_LinSpaced_seq.cpp | 2 + .../doc/snippets/DenseBase_setLinSpaced.cpp | 3 + .../snippets/DirectionWise_hnormalized.cpp | 7 + .../doc/snippets/DirectionWise_replicate.cpp | 4 + .../snippets/DirectionWise_replicate_int.cpp | 4 + .../EigenSolver_EigenSolver_MatrixType.cpp | 16 + .../doc/snippets/EigenSolver_compute.cpp | 6 + .../doc/snippets/EigenSolver_eigenvalues.cpp | 4 + .../doc/snippets/EigenSolver_eigenvectors.cpp | 4 + .../EigenSolver_pseudoEigenvectors.cpp | 9 + .../snippets/FullPivHouseholderQR_solve.cpp | 8 + .../eigen/doc/snippets/FullPivLU_image.cpp | 9 + .../eigen/doc/snippets/FullPivLU_kernel.cpp | 7 + .../eigen/doc/snippets/FullPivLU_solve.cpp | 11 + .../doc/snippets/GeneralizedEigenSolver.cpp | 7 + .../HessenbergDecomposition_compute.cpp | 6 + .../HessenbergDecomposition_matrixH.cpp | 8 + .../HessenbergDecomposition_packedMatrix.cpp | 9 + .../snippets/HouseholderQR_householderQ.cpp | 7 + .../doc/snippets/HouseholderQR_solve.cpp | 9 + ...ouseholderSequence_HouseholderSequence.cpp | 31 + thirdparty/eigen/doc/snippets/IOFormat.cpp | 14 + .../eigen/doc/snippets/JacobiSVD_basic.cpp | 9 + .../eigen/doc/snippets/Jacobi_makeGivens.cpp | 6 + .../eigen/doc/snippets/Jacobi_makeJacobi.cpp | 8 + thirdparty/eigen/doc/snippets/LLT_example.cpp | 12 + thirdparty/eigen/doc/snippets/LLT_solve.cpp | 8 + .../snippets/LeastSquaresNormalEquations.cpp | 4 + .../eigen/doc/snippets/LeastSquaresQR.cpp | 4 + .../eigen/doc/snippets/Map_general_stride.cpp | 5 + .../eigen/doc/snippets/Map_inner_stride.cpp | 5 + .../eigen/doc/snippets/Map_outer_stride.cpp | 3 + .../eigen/doc/snippets/Map_placement_new.cpp | 5 + thirdparty/eigen/doc/snippets/Map_simple.cpp | 3 + .../eigen/doc/snippets/MatrixBase_adjoint.cpp | 3 + .../eigen/doc/snippets/MatrixBase_all.cpp | 7 + .../snippets/MatrixBase_applyOnTheLeft.cpp | 7 + .../snippets/MatrixBase_applyOnTheRight.cpp | 9 + .../eigen/doc/snippets/MatrixBase_array.cpp | 4 + .../doc/snippets/MatrixBase_array_const.cpp | 4 + .../doc/snippets/MatrixBase_asDiagonal.cpp | 1 + .../doc/snippets/MatrixBase_block_int_int.cpp | 5 + .../MatrixBase_block_int_int_int_int.cpp | 5 + .../MatrixBase_bottomLeftCorner_int_int.cpp | 6 + .../MatrixBase_bottomRightCorner_int_int.cpp | 6 + .../snippets/MatrixBase_bottomRows_int.cpp | 6 + .../eigen/doc/snippets/MatrixBase_cast.cpp | 3 + .../eigen/doc/snippets/MatrixBase_col.cpp | 3 + .../eigen/doc/snippets/MatrixBase_colwise.cpp | 5 + ...trixBase_computeInverseAndDetWithCheck.cpp | 13 + .../MatrixBase_computeInverseWithCheck.cpp | 11 + .../doc/snippets/MatrixBase_cwiseAbs.cpp | 4 + .../doc/snippets/MatrixBase_cwiseAbs2.cpp | 4 + .../doc/snippets/MatrixBase_cwiseEqual.cpp | 7 + .../doc/snippets/MatrixBase_cwiseInverse.cpp | 4 + .../doc/snippets/MatrixBase_cwiseMax.cpp | 2 + .../doc/snippets/MatrixBase_cwiseMin.cpp | 2 + .../doc/snippets/MatrixBase_cwiseNotEqual.cpp | 7 + .../doc/snippets/MatrixBase_cwiseProduct.cpp | 4 + .../doc/snippets/MatrixBase_cwiseQuotient.cpp | 2 + .../doc/snippets/MatrixBase_cwiseSign.cpp | 4 + .../doc/snippets/MatrixBase_cwiseSqrt.cpp | 2 + .../doc/snippets/MatrixBase_diagonal.cpp | 4 + .../doc/snippets/MatrixBase_diagonal_int.cpp | 5 + .../MatrixBase_diagonal_template_int.cpp | 5 + .../doc/snippets/MatrixBase_eigenvalues.cpp | 3 + .../eigen/doc/snippets/MatrixBase_end_int.cpp | 5 + .../eigen/doc/snippets/MatrixBase_eval.cpp | 12 + .../MatrixBase_fixedBlock_int_int.cpp | 5 + .../doc/snippets/MatrixBase_hnormalized.cpp | 6 + .../doc/snippets/MatrixBase_homogeneous.cpp | 6 + .../doc/snippets/MatrixBase_identity.cpp | 1 + .../snippets/MatrixBase_identity_int_int.cpp | 1 + .../eigen/doc/snippets/MatrixBase_inverse.cpp | 3 + .../doc/snippets/MatrixBase_isDiagonal.cpp | 6 + .../doc/snippets/MatrixBase_isIdentity.cpp | 5 + .../eigen/doc/snippets/MatrixBase_isOnes.cpp | 5 + .../doc/snippets/MatrixBase_isOrthogonal.cpp | 6 + .../doc/snippets/MatrixBase_isUnitary.cpp | 5 + .../eigen/doc/snippets/MatrixBase_isZero.cpp | 5 + .../doc/snippets/MatrixBase_leftCols_int.cpp | 6 + .../eigen/doc/snippets/MatrixBase_noalias.cpp | 3 + .../eigen/doc/snippets/MatrixBase_ones.cpp | 2 + .../doc/snippets/MatrixBase_ones_int.cpp | 2 + .../doc/snippets/MatrixBase_ones_int_int.cpp | 1 + .../doc/snippets/MatrixBase_operatorNorm.cpp | 3 + .../eigen/doc/snippets/MatrixBase_prod.cpp | 3 + .../eigen/doc/snippets/MatrixBase_random.cpp | 1 + .../doc/snippets/MatrixBase_random_int.cpp | 1 + .../snippets/MatrixBase_random_int_int.cpp | 1 + .../doc/snippets/MatrixBase_replicate.cpp | 4 + .../snippets/MatrixBase_replicate_int_int.cpp | 4 + .../eigen/doc/snippets/MatrixBase_reverse.cpp | 8 + .../doc/snippets/MatrixBase_rightCols_int.cpp | 6 + .../eigen/doc/snippets/MatrixBase_row.cpp | 3 + .../eigen/doc/snippets/MatrixBase_rowwise.cpp | 5 + .../snippets/MatrixBase_segment_int_int.cpp | 5 + .../eigen/doc/snippets/MatrixBase_select.cpp | 6 + .../eigen/doc/snippets/MatrixBase_set.cpp | 13 + .../doc/snippets/MatrixBase_setIdentity.cpp | 3 + .../eigen/doc/snippets/MatrixBase_setOnes.cpp | 3 + .../doc/snippets/MatrixBase_setRandom.cpp | 3 + .../eigen/doc/snippets/MatrixBase_setZero.cpp | 3 + .../doc/snippets/MatrixBase_start_int.cpp | 5 + .../MatrixBase_template_int_bottomRows.cpp | 6 + .../snippets/MatrixBase_template_int_end.cpp | 5 + ...template_int_int_block_int_int_int_int.cpp | 5 + ...Base_template_int_int_bottomLeftCorner.cpp | 6 + ...plate_int_int_bottomLeftCorner_int_int.cpp | 6 + ...ase_template_int_int_bottomRightCorner.cpp | 6 + ...late_int_int_bottomRightCorner_int_int.cpp | 6 + ...rixBase_template_int_int_topLeftCorner.cpp | 6 + ...template_int_int_topLeftCorner_int_int.cpp | 6 + ...ixBase_template_int_int_topRightCorner.cpp | 6 + ...emplate_int_int_topRightCorner_int_int.cpp | 6 + .../MatrixBase_template_int_leftCols.cpp | 6 + .../MatrixBase_template_int_rightCols.cpp | 6 + .../MatrixBase_template_int_segment.cpp | 5 + .../MatrixBase_template_int_start.cpp | 5 + .../MatrixBase_template_int_topRows.cpp | 6 + .../MatrixBase_topLeftCorner_int_int.cpp | 6 + .../MatrixBase_topRightCorner_int_int.cpp | 6 + .../doc/snippets/MatrixBase_topRows_int.cpp | 6 + .../doc/snippets/MatrixBase_transpose.cpp | 8 + .../snippets/MatrixBase_triangularView.cpp | 9 + .../eigen/doc/snippets/MatrixBase_zero.cpp | 2 + .../doc/snippets/MatrixBase_zero_int.cpp | 2 + .../doc/snippets/MatrixBase_zero_int_int.cpp | 1 + .../snippets/Matrix_resize_NoChange_int.cpp | 3 + .../eigen/doc/snippets/Matrix_resize_int.cpp | 6 + .../snippets/Matrix_resize_int_NoChange.cpp | 3 + .../doc/snippets/Matrix_resize_int_int.cpp | 9 + .../doc/snippets/Matrix_setConstant_int.cpp | 3 + .../snippets/Matrix_setConstant_int_int.cpp | 3 + .../snippets/Matrix_setIdentity_int_int.cpp | 3 + .../eigen/doc/snippets/Matrix_setOnes_int.cpp | 3 + .../doc/snippets/Matrix_setOnes_int_int.cpp | 3 + .../doc/snippets/Matrix_setRandom_int.cpp | 3 + .../doc/snippets/Matrix_setRandom_int_int.cpp | 3 + .../eigen/doc/snippets/Matrix_setZero_int.cpp | 3 + .../doc/snippets/Matrix_setZero_int_int.cpp | 3 + .../eigen/doc/snippets/PartialPivLU_solve.cpp | 7 + .../eigen/doc/snippets/PartialRedux_count.cpp | 5 + .../doc/snippets/PartialRedux_maxCoeff.cpp | 3 + .../doc/snippets/PartialRedux_minCoeff.cpp | 3 + .../eigen/doc/snippets/PartialRedux_norm.cpp | 3 + .../eigen/doc/snippets/PartialRedux_prod.cpp | 3 + .../doc/snippets/PartialRedux_squaredNorm.cpp | 3 + .../eigen/doc/snippets/PartialRedux_sum.cpp | 3 + .../eigen/doc/snippets/RealQZ_compute.cpp | 17 + .../RealSchur_RealSchur_MatrixType.cpp | 10 + .../eigen/doc/snippets/RealSchur_compute.cpp | 6 + ...ointEigenSolver_SelfAdjointEigenSolver.cpp | 7 + ...lver_SelfAdjointEigenSolver_MatrixType.cpp | 17 + ...ver_SelfAdjointEigenSolver_MatrixType2.cpp | 16 + ...fAdjointEigenSolver_compute_MatrixType.cpp | 7 + ...AdjointEigenSolver_compute_MatrixType2.cpp | 9 + .../SelfAdjointEigenSolver_eigenvalues.cpp | 4 + .../SelfAdjointEigenSolver_eigenvectors.cpp | 4 + ...AdjointEigenSolver_operatorInverseSqrt.cpp | 9 + .../SelfAdjointEigenSolver_operatorSqrt.cpp | 8 + .../snippets/SelfAdjointView_eigenvalues.cpp | 3 + .../snippets/SelfAdjointView_operatorNorm.cpp | 3 + .../doc/snippets/SparseMatrix_coeffs.cpp | 9 + .../doc/snippets/TopicAliasing_block.cpp | 7 + .../snippets/TopicAliasing_block_correct.cpp | 7 + .../doc/snippets/TopicAliasing_cwise.cpp | 20 + .../doc/snippets/TopicAliasing_mult1.cpp | 4 + .../doc/snippets/TopicAliasing_mult2.cpp | 10 + .../doc/snippets/TopicAliasing_mult3.cpp | 4 + .../doc/snippets/TopicAliasing_mult4.cpp | 5 + .../doc/snippets/TopicAliasing_mult5.cpp | 5 + .../snippets/TopicStorageOrders_example.cpp | 18 + .../eigen/doc/snippets/Triangular_solve.cpp | 11 + ...lization_Tridiagonalization_MatrixType.cpp | 9 + .../snippets/Tridiagonalization_compute.cpp | 9 + .../Tridiagonalization_decomposeInPlace.cpp | 10 + .../snippets/Tridiagonalization_diagonal.cpp | 13 + ...iagonalization_householderCoefficients.cpp | 6 + .../Tridiagonalization_packedMatrix.cpp | 8 + .../Tutorial_AdvancedInitialization_Block.cpp | 5 + ..._AdvancedInitialization_CommaTemporary.cpp | 4 + .../Tutorial_AdvancedInitialization_Join.cpp | 11 + ...orial_AdvancedInitialization_LinSpaced.cpp | 7 + ...orial_AdvancedInitialization_ThreeWays.cpp | 20 + .../Tutorial_AdvancedInitialization_Zero.cpp | 13 + .../doc/snippets/Tutorial_Map_rowmajor.cpp | 7 + .../eigen/doc/snippets/Tutorial_Map_using.cpp | 21 + .../doc/snippets/Tutorial_ReshapeMat2Mat.cpp | 6 + .../doc/snippets/Tutorial_ReshapeMat2Vec.cpp | 11 + .../doc/snippets/Tutorial_SlicingCol.cpp | 11 + .../doc/snippets/Tutorial_SlicingVec.cpp | 4 + .../doc/snippets/Tutorial_commainit_01.cpp | 5 + .../doc/snippets/Tutorial_commainit_01b.cpp | 5 + .../doc/snippets/Tutorial_commainit_02.cpp | 7 + .../Tutorial_solve_matrix_inverse.cpp | 6 + .../snippets/Tutorial_solve_multiple_rhs.cpp | 10 + .../Tutorial_solve_reuse_decomposition.cpp | 13 + .../doc/snippets/Tutorial_solve_singular.cpp | 9 + .../snippets/Tutorial_solve_triangular.cpp | 8 + .../Tutorial_solve_triangular_inplace.cpp | 6 + .../doc/snippets/VectorwiseOp_homogeneous.cpp | 7 + .../eigen/doc/snippets/Vectorwise_reverse.cpp | 10 + .../eigen/doc/snippets/class_FullPivLU.cpp | 16 + .../eigen/doc/snippets/compile_snippet.cpp.in | 20 + .../snippets/tut_arithmetic_redux_minmax.cpp | 12 + .../tut_arithmetic_transpose_aliasing.cpp | 5 + .../tut_arithmetic_transpose_conjugate.cpp | 12 + .../tut_arithmetic_transpose_inplace.cpp | 6 + .../tut_matrix_assignment_resizing.cpp | 5 + .../eigen/doc/special_examples/CMakeLists.txt | 35 + .../Tutorial_sparse_example.cpp | 34 + .../Tutorial_sparse_example_details.cpp | 44 + .../doc/special_examples/random_cpp11.cpp | 14 + thirdparty/eigen/doc/tutorial.cpp | 62 + thirdparty/eigen/eigen3.pc.in | 9 + thirdparty/eigen/failtest/CMakeLists.txt | 75 + thirdparty/eigen/failtest/bdcsvd_int.cpp | 14 + .../block_nonconst_ctor_on_const_xpr_0.cpp | 15 + .../block_nonconst_ctor_on_const_xpr_1.cpp | 15 + .../block_nonconst_ctor_on_const_xpr_2.cpp | 16 + .../block_on_const_type_actually_const_0.cpp | 16 + .../block_on_const_type_actually_const_1.cpp | 16 + thirdparty/eigen/failtest/colpivqr_int.cpp | 14 + .../const_qualified_block_method_retval_0.cpp | 15 + .../const_qualified_block_method_retval_1.cpp | 15 + ...const_qualified_diagonal_method_retval.cpp | 15 + ...onst_qualified_transpose_method_retval.cpp | 15 + ...seunaryview_nonconst_ctor_on_const_xpr.cpp | 15 + ...unaryview_on_const_type_actually_const.cpp | 16 + .../diagonal_nonconst_ctor_on_const_xpr.cpp | 15 + .../diagonal_on_const_type_actually_const.cpp | 16 + .../eigen/failtest/eigensolver_cplx.cpp | 14 + thirdparty/eigen/failtest/eigensolver_int.cpp | 14 + .../eigen/failtest/failtest_sanity_check.cpp | 5 + thirdparty/eigen/failtest/fullpivlu_int.cpp | 14 + thirdparty/eigen/failtest/fullpivqr_int.cpp | 14 + thirdparty/eigen/failtest/jacobisvd_int.cpp | 14 + thirdparty/eigen/failtest/ldlt_int.cpp | 14 + thirdparty/eigen/failtest/llt_int.cpp | 14 + .../map_nonconst_ctor_on_const_ptr_0.cpp | 15 + .../map_nonconst_ctor_on_const_ptr_1.cpp | 15 + .../map_nonconst_ctor_on_const_ptr_2.cpp | 15 + .../map_nonconst_ctor_on_const_ptr_3.cpp | 15 + .../map_nonconst_ctor_on_const_ptr_4.cpp | 15 + .../map_on_const_type_actually_const_0.cpp | 15 + .../map_on_const_type_actually_const_1.cpp | 15 + .../eigen/failtest/partialpivlu_int.cpp | 14 + thirdparty/eigen/failtest/qr_int.cpp | 14 + thirdparty/eigen/failtest/ref_1.cpp | 18 + thirdparty/eigen/failtest/ref_2.cpp | 15 + thirdparty/eigen/failtest/ref_3.cpp | 15 + thirdparty/eigen/failtest/ref_4.cpp | 15 + thirdparty/eigen/failtest/ref_5.cpp | 16 + ...adjointview_nonconst_ctor_on_const_xpr.cpp | 15 + ...jointview_on_const_type_actually_const.cpp | 16 + thirdparty/eigen/failtest/sparse_ref_1.cpp | 18 + thirdparty/eigen/failtest/sparse_ref_2.cpp | 15 + thirdparty/eigen/failtest/sparse_ref_3.cpp | 15 + thirdparty/eigen/failtest/sparse_ref_4.cpp | 15 + thirdparty/eigen/failtest/sparse_ref_5.cpp | 16 + .../failtest/sparse_storage_mismatch.cpp | 16 + thirdparty/eigen/failtest/swap_1.cpp | 14 + thirdparty/eigen/failtest/swap_2.cpp | 14 + thirdparty/eigen/failtest/ternary_1.cpp | 13 + thirdparty/eigen/failtest/ternary_2.cpp | 13 + .../transpose_nonconst_ctor_on_const_xpr.cpp | 15 + ...transpose_on_const_type_actually_const.cpp | 16 + ...angularview_nonconst_ctor_on_const_xpr.cpp | 15 + ...gularview_on_const_type_actually_const.cpp | 16 + thirdparty/eigen/lapack/CMakeLists.txt | 449 + thirdparty/eigen/lapack/cholesky.cpp | 72 + thirdparty/eigen/lapack/clacgv.f | 116 + thirdparty/eigen/lapack/cladiv.f | 97 + thirdparty/eigen/lapack/clarf.f | 232 + thirdparty/eigen/lapack/clarfb.f | 771 + thirdparty/eigen/lapack/clarfg.f | 203 + thirdparty/eigen/lapack/clarft.f | 328 + thirdparty/eigen/lapack/complex_double.cpp | 18 + thirdparty/eigen/lapack/complex_single.cpp | 18 + thirdparty/eigen/lapack/dladiv.f | 128 + thirdparty/eigen/lapack/dlamch.f | 189 + thirdparty/eigen/lapack/dlapy2.f | 104 + thirdparty/eigen/lapack/dlapy3.f | 111 + thirdparty/eigen/lapack/dlarf.f | 227 + thirdparty/eigen/lapack/dlarfb.f | 762 + thirdparty/eigen/lapack/dlarfg.f | 196 + thirdparty/eigen/lapack/dlarft.f | 326 + thirdparty/eigen/lapack/double.cpp | 18 + thirdparty/eigen/lapack/dsecnd_NONE.f | 52 + thirdparty/eigen/lapack/eigenvalues.cpp | 62 + thirdparty/eigen/lapack/ilaclc.f | 118 + thirdparty/eigen/lapack/ilaclr.f | 121 + thirdparty/eigen/lapack/iladlc.f | 118 + thirdparty/eigen/lapack/iladlr.f | 121 + thirdparty/eigen/lapack/ilaslc.f | 118 + thirdparty/eigen/lapack/ilaslr.f | 121 + thirdparty/eigen/lapack/ilazlc.f | 118 + thirdparty/eigen/lapack/ilazlr.f | 121 + thirdparty/eigen/lapack/lapack_common.h | 29 + thirdparty/eigen/lapack/lu.cpp | 89 + thirdparty/eigen/lapack/second_NONE.f | 52 + thirdparty/eigen/lapack/single.cpp | 18 + thirdparty/eigen/lapack/sladiv.f | 128 + thirdparty/eigen/lapack/slamch.f | 192 + thirdparty/eigen/lapack/slapy2.f | 104 + thirdparty/eigen/lapack/slapy3.f | 111 + thirdparty/eigen/lapack/slarf.f | 227 + thirdparty/eigen/lapack/slarfb.f | 763 + thirdparty/eigen/lapack/slarfg.f | 196 + thirdparty/eigen/lapack/slarft.f | 326 + thirdparty/eigen/lapack/svd.cpp | 138 + thirdparty/eigen/lapack/zlacgv.f | 116 + thirdparty/eigen/lapack/zladiv.f | 97 + thirdparty/eigen/lapack/zlarf.f | 232 + thirdparty/eigen/lapack/zlarfb.f | 774 + thirdparty/eigen/lapack/zlarfg.f | 203 + thirdparty/eigen/lapack/zlarft.f | 327 + thirdparty/eigen/scripts/CMakeLists.txt | 6 + thirdparty/eigen/scripts/buildtests.in | 22 + .../eigen/scripts/cdashtesting.cmake.in | 49 + thirdparty/eigen/scripts/check.in | 21 + thirdparty/eigen/scripts/debug.in | 3 + .../eigen/scripts/eigen_gen_credits.cpp | 232 + thirdparty/eigen/scripts/eigen_gen_docs | 24 + thirdparty/eigen/scripts/release.in | 3 + thirdparty/eigen/scripts/relicense.py | 69 + .../eigen/signature_of_eigen3_matrix_library | 1 + thirdparty/eigen/test/CMakeLists.txt | 381 + thirdparty/eigen/test/adjoint.cpp | 200 + thirdparty/eigen/test/array.cpp | 495 + thirdparty/eigen/test/array_for_matrix.cpp | 284 + thirdparty/eigen/test/array_of_string.cpp | 32 + thirdparty/eigen/test/array_replicate.cpp | 82 + thirdparty/eigen/test/array_reverse.cpp | 146 + thirdparty/eigen/test/bandmatrix.cpp | 71 + thirdparty/eigen/test/basicstuff.cpp | 280 + thirdparty/eigen/test/bdcsvd.cpp | 111 + thirdparty/eigen/test/bicgstab.cpp | 34 + thirdparty/eigen/test/block.cpp | 264 + thirdparty/eigen/test/boostmultiprec.cpp | 201 + thirdparty/eigen/test/bug1213.cpp | 13 + thirdparty/eigen/test/bug1213.h | 8 + thirdparty/eigen/test/bug1213_main.cpp | 18 + thirdparty/eigen/test/cholesky.cpp | 509 + thirdparty/eigen/test/cholmod_support.cpp | 57 + thirdparty/eigen/test/commainitializer.cpp | 106 + thirdparty/eigen/test/conjugate_gradient.cpp | 34 + thirdparty/eigen/test/conservative_resize.cpp | 134 + thirdparty/eigen/test/corners.cpp | 118 + thirdparty/eigen/test/ctorleak.cpp | 69 + thirdparty/eigen/test/cuda_basic.cu | 173 + thirdparty/eigen/test/cuda_common.h | 101 + thirdparty/eigen/test/denseLM.cpp | 190 + thirdparty/eigen/test/dense_storage.cpp | 76 + thirdparty/eigen/test/determinant.cpp | 67 + thirdparty/eigen/test/diagonal.cpp | 84 + thirdparty/eigen/test/diagonalmatrices.cpp | 129 + thirdparty/eigen/test/dontalign.cpp | 63 + thirdparty/eigen/test/dynalloc.cpp | 175 + thirdparty/eigen/test/eigen2support.cpp | 66 + thirdparty/eigen/test/eigensolver_complex.cpp | 168 + .../test/eigensolver_generalized_real.cpp | 97 + thirdparty/eigen/test/eigensolver_generic.cpp | 157 + .../eigen/test/eigensolver_selfadjoint.cpp | 265 + thirdparty/eigen/test/evaluator_common.h | 0 thirdparty/eigen/test/evaluators.cpp | 499 + thirdparty/eigen/test/exceptions.cpp | 113 + thirdparty/eigen/test/fastmath.cpp | 99 + thirdparty/eigen/test/first_aligned.cpp | 51 + thirdparty/eigen/test/geo_alignedbox.cpp | 189 + thirdparty/eigen/test/geo_eulerangles.cpp | 112 + thirdparty/eigen/test/geo_homogeneous.cpp | 125 + thirdparty/eigen/test/geo_hyperplane.cpp | 195 + thirdparty/eigen/test/geo_orthomethods.cpp | 133 + .../eigen/test/geo_parametrizedline.cpp | 104 + thirdparty/eigen/test/geo_quaternion.cpp | 289 + thirdparty/eigen/test/geo_transformations.cpp | 645 + thirdparty/eigen/test/half_float.cpp | 252 + thirdparty/eigen/test/hessenberg.cpp | 62 + thirdparty/eigen/test/householder.cpp | 138 + thirdparty/eigen/test/incomplete_cholesky.cpp | 65 + .../eigen/test/inplace_decomposition.cpp | 110 + thirdparty/eigen/test/integer_types.cpp | 169 + thirdparty/eigen/test/inverse.cpp | 117 + thirdparty/eigen/test/is_same_dense.cpp | 33 + thirdparty/eigen/test/jacobi.cpp | 81 + thirdparty/eigen/test/jacobisvd.cpp | 120 + thirdparty/eigen/test/linearstructure.cpp | 149 + thirdparty/eigen/test/lscg.cpp | 29 + thirdparty/eigen/test/lu.cpp | 281 + thirdparty/eigen/test/main.h | 743 + thirdparty/eigen/test/mapped_matrix.cpp | 211 + thirdparty/eigen/test/mapstaticmethods.cpp | 175 + thirdparty/eigen/test/mapstride.cpp | 181 + thirdparty/eigen/test/meta.cpp | 97 + thirdparty/eigen/test/metis_support.cpp | 25 + thirdparty/eigen/test/miscmatrices.cpp | 47 + thirdparty/eigen/test/mixingtypes.cpp | 300 + thirdparty/eigen/test/mpl2only.cpp | 20 + thirdparty/eigen/test/nesting_ops.cpp | 107 + thirdparty/eigen/test/nomalloc.cpp | 229 + thirdparty/eigen/test/nullary.cpp | 264 + thirdparty/eigen/test/packetmath.cpp | 641 + thirdparty/eigen/test/pardiso_support.cpp | 29 + thirdparty/eigen/test/pastix_support.cpp | 54 + thirdparty/eigen/test/permutationmatrices.cpp | 150 + thirdparty/eigen/test/prec_inverse_4x4.cpp | 83 + thirdparty/eigen/test/product.h | 231 + thirdparty/eigen/test/product_extra.cpp | 375 + thirdparty/eigen/test/product_large.cpp | 107 + thirdparty/eigen/test/product_mmtr.cpp | 76 + thirdparty/eigen/test/product_notemporary.cpp | 155 + thirdparty/eigen/test/product_selfadjoint.cpp | 87 + thirdparty/eigen/test/product_small.cpp | 293 + thirdparty/eigen/test/product_symm.cpp | 94 + thirdparty/eigen/test/product_syrk.cpp | 136 + thirdparty/eigen/test/product_trmm.cpp | 115 + thirdparty/eigen/test/product_trmv.cpp | 91 + thirdparty/eigen/test/product_trsolve.cpp | 101 + thirdparty/eigen/test/qr.cpp | 132 + thirdparty/eigen/test/qr_colpivoting.cpp | 342 + thirdparty/eigen/test/qr_fullpivoting.cpp | 159 + thirdparty/eigen/test/qtvector.cpp | 158 + thirdparty/eigen/test/rand.cpp | 118 + thirdparty/eigen/test/real_qz.cpp | 95 + thirdparty/eigen/test/redux.cpp | 176 + thirdparty/eigen/test/ref.cpp | 280 + thirdparty/eigen/test/resize.cpp | 41 + thirdparty/eigen/test/rvalue_types.cpp | 64 + thirdparty/eigen/test/schur_complex.cpp | 91 + thirdparty/eigen/test/schur_real.cpp | 112 + thirdparty/eigen/test/selfadjoint.cpp | 72 + thirdparty/eigen/test/simplicial_cholesky.cpp | 47 + thirdparty/eigen/test/sizeof.cpp | 47 + thirdparty/eigen/test/sizeoverflow.cpp | 64 + thirdparty/eigen/test/smallvectors.cpp | 67 + thirdparty/eigen/test/sparse.h | 210 + thirdparty/eigen/test/sparseLM.cpp | 176 + thirdparty/eigen/test/sparse_basic.cpp | 639 + thirdparty/eigen/test/sparse_block.cpp | 255 + thirdparty/eigen/test/sparse_permutations.cpp | 236 + thirdparty/eigen/test/sparse_product.cpp | 377 + thirdparty/eigen/test/sparse_ref.cpp | 139 + thirdparty/eigen/test/sparse_solver.h | 565 + thirdparty/eigen/test/sparse_solvers.cpp | 112 + thirdparty/eigen/test/sparse_vector.cpp | 163 + thirdparty/eigen/test/sparselu.cpp | 45 + thirdparty/eigen/test/sparseqr.cpp | 106 + thirdparty/eigen/test/special_numbers.cpp | 58 + thirdparty/eigen/test/spqr_support.cpp | 64 + thirdparty/eigen/test/stable_norm.cpp | 192 + thirdparty/eigen/test/stddeque.cpp | 132 + thirdparty/eigen/test/stddeque_overload.cpp | 158 + thirdparty/eigen/test/stdlist.cpp | 132 + thirdparty/eigen/test/stdlist_overload.cpp | 192 + thirdparty/eigen/test/stdvector.cpp | 148 + thirdparty/eigen/test/stdvector_overload.cpp | 161 + thirdparty/eigen/test/superlu_support.cpp | 23 + thirdparty/eigen/test/svd_common.h | 483 + thirdparty/eigen/test/svd_fill.h | 119 + thirdparty/eigen/test/swap.cpp | 94 + thirdparty/eigen/test/triangular.cpp | 247 + thirdparty/eigen/test/umeyama.cpp | 183 + thirdparty/eigen/test/umfpack_support.cpp | 32 + thirdparty/eigen/test/unalignedassert.cpp | 180 + thirdparty/eigen/test/unalignedcount.cpp | 53 + .../eigen/test/upperbidiagonalization.cpp | 43 + thirdparty/eigen/test/vectorization_logic.cpp | 419 + thirdparty/eigen/test/vectorwiseop.cpp | 252 + thirdparty/eigen/test/visitor.cpp | 135 + thirdparty/eigen/test/zerosized.cpp | 102 + thirdparty/eigen/unsupported/CMakeLists.txt | 7 + .../eigen/unsupported/Eigen/AdolcForward | 156 + .../eigen/unsupported/Eigen/AlignedVector3 | 224 + .../eigen/unsupported/Eigen/ArpackSupport | 31 + thirdparty/eigen/unsupported/Eigen/AutoDiff | 40 + thirdparty/eigen/unsupported/Eigen/BVH | 95 + .../eigen/unsupported/Eigen/CMakeLists.txt | 32 + .../unsupported/Eigen/CXX11/CMakeLists.txt | 8 + .../eigen/unsupported/Eigen/CXX11/Tensor | 153 + .../unsupported/Eigen/CXX11/TensorSymmetry | 42 + .../eigen/unsupported/Eigen/CXX11/ThreadPool | 65 + .../Eigen/CXX11/src/Tensor/README.md | 1757 ++ .../Eigen/CXX11/src/Tensor/Tensor.h | 527 + .../Eigen/CXX11/src/Tensor/TensorArgMax.h | 299 + .../Eigen/CXX11/src/Tensor/TensorAssign.h | 181 + .../Eigen/CXX11/src/Tensor/TensorBase.h | 1010 + .../CXX11/src/Tensor/TensorBroadcasting.h | 392 + .../Eigen/CXX11/src/Tensor/TensorChipping.h | 384 + .../CXX11/src/Tensor/TensorConcatenation.h | 361 + .../CXX11/src/Tensor/TensorContraction.h | 628 + .../src/Tensor/TensorContractionBlocking.h | 56 + .../CXX11/src/Tensor/TensorContractionCuda.h | 1391 ++ .../src/Tensor/TensorContractionMapper.h | 467 + .../src/Tensor/TensorContractionThreadPool.h | 1052 + .../Eigen/CXX11/src/Tensor/TensorConversion.h | 279 + .../CXX11/src/Tensor/TensorConvolution.h | 1104 ++ .../Eigen/CXX11/src/Tensor/TensorCostModel.h | 212 + .../Eigen/CXX11/src/Tensor/TensorCustomOp.h | 313 + .../Eigen/CXX11/src/Tensor/TensorDevice.h | 68 + .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 341 + .../CXX11/src/Tensor/TensorDeviceDefault.h | 81 + .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 281 + .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 279 + .../CXX11/src/Tensor/TensorDimensionList.h | 236 + .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 428 + .../Eigen/CXX11/src/Tensor/TensorEvalTo.h | 181 + .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 633 + .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 288 + .../Eigen/CXX11/src/Tensor/TensorExpr.h | 371 + .../Eigen/CXX11/src/Tensor/TensorFFT.h | 651 + .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 389 + .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 167 + .../src/Tensor/TensorForwardDeclarations.h | 109 + .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 489 + .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 185 + .../CXX11/src/Tensor/TensorGlobalFunctions.h | 33 + .../Eigen/CXX11/src/Tensor/TensorIO.h | 79 + .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 509 + .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 725 + .../Eigen/CXX11/src/Tensor/TensorInflation.h | 229 + .../CXX11/src/Tensor/TensorInitializer.h | 82 + .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 253 + .../Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 209 + .../Eigen/CXX11/src/Tensor/TensorMacros.h | 54 + .../Eigen/CXX11/src/Tensor/TensorMap.h | 321 + .../Eigen/CXX11/src/Tensor/TensorMeta.h | 218 + .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 905 + .../Eigen/CXX11/src/Tensor/TensorPadding.h | 397 + .../Eigen/CXX11/src/Tensor/TensorPatch.h | 269 + .../Eigen/CXX11/src/Tensor/TensorRandom.h | 276 + .../Eigen/CXX11/src/Tensor/TensorReduction.h | 781 + .../CXX11/src/Tensor/TensorReductionCuda.h | 750 + .../CXX11/src/Tensor/TensorReductionSycl.h | 239 + .../Eigen/CXX11/src/Tensor/TensorRef.h | 429 + .../Eigen/CXX11/src/Tensor/TensorReverse.h | 288 + .../Eigen/CXX11/src/Tensor/TensorScan.h | 287 + .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 264 + .../Eigen/CXX11/src/Tensor/TensorStorage.h | 146 + .../Eigen/CXX11/src/Tensor/TensorStriding.h | 338 + .../Eigen/CXX11/src/Tensor/TensorSycl.h | 82 + .../TensorSyclConvertToDeviceExpression.h | 131 + .../src/Tensor/TensorSyclExprConstructor.h | 264 + .../src/Tensor/TensorSyclExtractAccessor.h | 217 + .../src/Tensor/TensorSyclExtractFunctors.h | 194 + .../CXX11/src/Tensor/TensorSyclLeafCount.h | 123 + .../src/Tensor/TensorSyclPlaceHolderExpr.h | 195 + .../Eigen/CXX11/src/Tensor/TensorSyclRun.h | 64 + .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 234 + .../Eigen/CXX11/src/Tensor/TensorTraits.h | 272 + .../Eigen/CXX11/src/Tensor/TensorUInt128.h | 248 + .../CXX11/src/Tensor/TensorVolumePatch.h | 608 + .../src/TensorSymmetry/DynamicSymmetry.h | 293 + .../CXX11/src/TensorSymmetry/StaticSymmetry.h | 236 + .../Eigen/CXX11/src/TensorSymmetry/Symmetry.h | 338 + .../TensorSymmetry/util/TemplateGroupTheory.h | 666 + .../Eigen/CXX11/src/ThreadPool/EventCount.h | 233 + .../src/ThreadPool/NonBlockingThreadPool.h | 274 + .../Eigen/CXX11/src/ThreadPool/RunQueue.h | 210 + .../CXX11/src/ThreadPool/SimpleThreadPool.h | 154 + .../CXX11/src/ThreadPool/ThreadEnvironment.h | 38 + .../Eigen/CXX11/src/ThreadPool/ThreadLocal.h | 22 + .../src/ThreadPool/ThreadPoolInterface.h | 33 + .../Eigen/CXX11/src/ThreadPool/ThreadYield.h | 20 + .../Eigen/CXX11/src/util/CXX11Meta.h | 542 + .../Eigen/CXX11/src/util/CXX11Workarounds.h | 88 + .../Eigen/CXX11/src/util/EmulateArray.h | 267 + .../Eigen/CXX11/src/util/EmulateCXX11Meta.h | 311 + .../Eigen/CXX11/src/util/MaxSizeVector.h | 141 + .../eigen/unsupported/Eigen/EulerAngles | 43 + thirdparty/eigen/unsupported/Eigen/FFT | 418 + .../eigen/unsupported/Eigen/IterativeSolvers | 42 + .../eigen/unsupported/Eigen/KroneckerProduct | 36 + .../unsupported/Eigen/LevenbergMarquardt | 45 + .../eigen/unsupported/Eigen/MPRealSupport | 209 + .../eigen/unsupported/Eigen/MatrixFunctions | 501 + .../eigen/unsupported/Eigen/MoreVectorization | 24 + .../unsupported/Eigen/NonLinearOptimization | 134 + .../eigen/unsupported/Eigen/NumericalDiff | 56 + .../eigen/unsupported/Eigen/OpenGLSupport | 322 + .../eigen/unsupported/Eigen/Polynomials | 138 + thirdparty/eigen/unsupported/Eigen/Skyline | 39 + .../eigen/unsupported/Eigen/SparseExtra | 53 + .../eigen/unsupported/Eigen/SpecialFunctions | 63 + thirdparty/eigen/unsupported/Eigen/Splines | 31 + .../Eigen/src/AutoDiff/AutoDiffJacobian.h | 108 + .../Eigen/src/AutoDiff/AutoDiffScalar.h | 684 + .../Eigen/src/AutoDiff/AutoDiffVector.h | 220 + .../unsupported/Eigen/src/BVH/BVAlgorithms.h | 293 + .../eigen/unsupported/Eigen/src/BVH/KdBVH.h | 222 + .../ArpackSelfAdjointEigenSolver.h | 805 + .../Eigen/src/EulerAngles/CMakeLists.txt | 6 + .../Eigen/src/EulerAngles/EulerAngles.h | 386 + .../Eigen/src/EulerAngles/EulerSystem.h | 326 + .../unsupported/Eigen/src/FFT/ei_fftw_impl.h | 261 + .../Eigen/src/FFT/ei_kissfft_impl.h | 420 + .../IterativeSolvers/ConstrainedConjGrad.h | 189 + .../Eigen/src/IterativeSolvers/DGMRES.h | 513 + .../Eigen/src/IterativeSolvers/GMRES.h | 343 + .../Eigen/src/IterativeSolvers/IncompleteLU.h | 90 + .../IterativeSolvers/IterationController.h | 154 + .../Eigen/src/IterativeSolvers/MINRES.h | 289 + .../Eigen/src/IterativeSolvers/Scaling.h | 187 + .../KroneckerProduct/KroneckerTensorProduct.h | 305 + .../LevenbergMarquardt/CopyrightMINPACK.txt | 52 + .../Eigen/src/LevenbergMarquardt/LMcovar.h | 84 + .../Eigen/src/LevenbergMarquardt/LMonestep.h | 202 + .../Eigen/src/LevenbergMarquardt/LMpar.h | 160 + .../Eigen/src/LevenbergMarquardt/LMqrsolv.h | 188 + .../LevenbergMarquardt/LevenbergMarquardt.h | 396 + .../src/MatrixFunctions/MatrixExponential.h | 422 + .../src/MatrixFunctions/MatrixFunction.h | 581 + .../src/MatrixFunctions/MatrixLogarithm.h | 373 + .../Eigen/src/MatrixFunctions/MatrixPower.h | 709 + .../src/MatrixFunctions/MatrixSquareRoot.h | 370 + .../Eigen/src/MatrixFunctions/StemFunction.h | 117 + .../src/MoreVectorization/MathFunctions.h | 95 + .../HybridNonLinearSolver.h | 601 + .../LevenbergMarquardt.h | 657 + .../Eigen/src/NonLinearOptimization/chkder.h | 66 + .../Eigen/src/NonLinearOptimization/covar.h | 70 + .../Eigen/src/NonLinearOptimization/dogleg.h | 107 + .../Eigen/src/NonLinearOptimization/fdjac1.h | 79 + .../Eigen/src/NonLinearOptimization/lmpar.h | 298 + .../Eigen/src/NonLinearOptimization/qrsolv.h | 91 + .../Eigen/src/NonLinearOptimization/r1mpyq.h | 30 + .../Eigen/src/NonLinearOptimization/r1updt.h | 99 + .../Eigen/src/NonLinearOptimization/rwupdt.h | 49 + .../Eigen/src/NumericalDiff/NumericalDiff.h | 130 + .../Eigen/src/Polynomials/Companion.h | 276 + .../Eigen/src/Polynomials/PolynomialSolver.h | 406 + .../Eigen/src/Polynomials/PolynomialUtils.h | 143 + .../Eigen/src/Skyline/SkylineInplaceLU.h | 352 + .../Eigen/src/Skyline/SkylineMatrix.h | 862 + .../Eigen/src/Skyline/SkylineMatrixBase.h | 212 + .../Eigen/src/Skyline/SkylineProduct.h | 295 + .../Eigen/src/Skyline/SkylineStorage.h | 259 + .../Eigen/src/Skyline/SkylineUtil.h | 89 + .../SparseExtra/BlockOfDynamicSparseMatrix.h | 122 + .../Eigen/src/SparseExtra/BlockSparseMatrix.h | 1079 + .../src/SparseExtra/DynamicSparseMatrix.h | 392 + .../Eigen/src/SparseExtra/MarketIO.h | 274 + .../src/SparseExtra/MatrixMarketIterator.h | 247 + .../Eigen/src/SparseExtra/RandomSetter.h | 327 + .../SpecialFunctionsArrayAPI.h | 124 + .../SpecialFunctionsFunctors.h | 236 + .../SpecialFunctions/SpecialFunctionsHalf.h | 47 + .../SpecialFunctions/SpecialFunctionsImpl.h | 1565 ++ .../SpecialFunctionsPacketMath.h | 58 + .../arch/CUDA/CudaSpecialFunctions.h | 165 + .../unsupported/Eigen/src/Splines/Spline.h | 512 + .../Eigen/src/Splines/SplineFitting.h | 430 + .../unsupported/Eigen/src/Splines/SplineFwd.h | 93 + thirdparty/eigen/unsupported/README.txt | 50 + .../eigen/unsupported/bench/bench_svd.cpp | 123 + .../eigen/unsupported/doc/CMakeLists.txt | 4 + thirdparty/eigen/unsupported/doc/Overview.dox | 28 + .../unsupported/doc/eigendoxy_layout.xml.in | 177 + .../unsupported/doc/examples/BVH_Example.cpp | 50 + .../unsupported/doc/examples/CMakeLists.txt | 20 + .../unsupported/doc/examples/EulerAngles.cpp | 46 + .../eigen/unsupported/doc/examples/FFT.cpp | 118 + .../doc/examples/MatrixExponential.cpp | 16 + .../doc/examples/MatrixFunction.cpp | 23 + .../doc/examples/MatrixLogarithm.cpp | 15 + .../unsupported/doc/examples/MatrixPower.cpp | 16 + .../doc/examples/MatrixPower_optimal.cpp | 17 + .../unsupported/doc/examples/MatrixSine.cpp | 20 + .../unsupported/doc/examples/MatrixSinh.cpp | 20 + .../doc/examples/MatrixSquareRoot.cpp | 16 + .../doc/examples/PolynomialSolver1.cpp | 53 + .../doc/examples/PolynomialUtils1.cpp | 20 + .../unsupported/doc/snippets/CMakeLists.txt | 26 + thirdparty/eigen/unsupported/test/BVH.cpp | 222 + .../eigen/unsupported/test/CMakeLists.txt | 259 + .../eigen/unsupported/test/EulerAngles.cpp | 208 + thirdparty/eigen/unsupported/test/FFT.cpp | 2 + thirdparty/eigen/unsupported/test/FFTW.cpp | 262 + .../test/NonLinearOptimization.cpp | 1878 ++ .../eigen/unsupported/test/NumericalDiff.cpp | 114 + .../eigen/unsupported/test/alignedvector3.cpp | 84 + .../eigen/unsupported/test/autodiff.cpp | 367 + .../unsupported/test/autodiff_scalar.cpp | 83 + .../unsupported/test/cxx11_eventcount.cpp | 142 + .../eigen/unsupported/test/cxx11_meta.cpp | 357 + .../test/cxx11_non_blocking_thread_pool.cpp | 107 + .../eigen/unsupported/test/cxx11_runqueue.cpp | 235 + .../unsupported/test/cxx11_tensor_argmax.cpp | 294 + .../test/cxx11_tensor_argmax_cuda.cu | 254 + .../unsupported/test/cxx11_tensor_assign.cpp | 370 + .../test/cxx11_tensor_broadcast_sycl.cpp | 150 + .../test/cxx11_tensor_broadcasting.cpp | 194 + .../test/cxx11_tensor_builtins_sycl.cpp | 272 + .../test/cxx11_tensor_cast_float16_cuda.cu | 82 + .../unsupported/test/cxx11_tensor_casts.cpp | 115 + .../test/cxx11_tensor_chipping.cpp | 425 + .../test/cxx11_tensor_comparisons.cpp | 84 + .../test/cxx11_tensor_complex_cuda.cu | 153 + .../cxx11_tensor_complex_cwise_ops_cuda.cu | 97 + .../test/cxx11_tensor_concatenation.cpp | 137 + .../unsupported/test/cxx11_tensor_const.cpp | 62 + .../test/cxx11_tensor_contract_cuda.cu | 216 + .../test/cxx11_tensor_contraction.cpp | 545 + .../test/cxx11_tensor_convolution.cpp | 149 + .../unsupported/test/cxx11_tensor_cuda.cu | 1287 ++ .../test/cxx11_tensor_custom_index.cpp | 100 + .../test/cxx11_tensor_custom_op.cpp | 111 + .../unsupported/test/cxx11_tensor_device.cu | 390 + .../test/cxx11_tensor_device_sycl.cpp | 77 + .../test/cxx11_tensor_dimension.cpp | 69 + .../unsupported/test/cxx11_tensor_empty.cpp | 40 + .../unsupported/test/cxx11_tensor_expr.cpp | 314 + .../unsupported/test/cxx11_tensor_fft.cpp | 273 + .../test/cxx11_tensor_fixed_size.cpp | 261 + .../test/cxx11_tensor_forced_eval.cpp | 79 + .../test/cxx11_tensor_forced_eval_sycl.cpp | 81 + .../test/cxx11_tensor_generator.cpp | 91 + .../unsupported/test/cxx11_tensor_ifft.cpp | 154 + .../test/cxx11_tensor_image_patch.cpp | 757 + .../test/cxx11_tensor_index_list.cpp | 386 + .../test/cxx11_tensor_inflation.cpp | 81 + .../unsupported/test/cxx11_tensor_intdiv.cpp | 147 + .../unsupported/test/cxx11_tensor_io.cpp | 136 + .../test/cxx11_tensor_layout_swap.cpp | 61 + .../unsupported/test/cxx11_tensor_lvalue.cpp | 42 + .../unsupported/test/cxx11_tensor_map.cpp | 277 + .../unsupported/test/cxx11_tensor_math.cpp | 46 + .../test/cxx11_tensor_mixed_indices.cpp | 53 + .../test/cxx11_tensor_morphing.cpp | 485 + .../test/cxx11_tensor_morphing_sycl.cpp | 95 + .../test/cxx11_tensor_notification.cpp | 81 + .../test/cxx11_tensor_of_complex.cpp | 103 + .../test/cxx11_tensor_of_const_values.cpp | 105 + .../test/cxx11_tensor_of_float16_cuda.cu | 494 + .../test/cxx11_tensor_of_strings.cpp | 152 + .../unsupported/test/cxx11_tensor_padding.cpp | 93 + .../unsupported/test/cxx11_tensor_patch.cpp | 172 + .../unsupported/test/cxx11_tensor_random.cpp | 78 + .../test/cxx11_tensor_random_cuda.cu | 88 + .../test/cxx11_tensor_reduction.cpp | 508 + .../test/cxx11_tensor_reduction_cuda.cu | 157 + .../test/cxx11_tensor_reduction_sycl.cpp | 147 + .../unsupported/test/cxx11_tensor_ref.cpp | 248 + .../unsupported/test/cxx11_tensor_reverse.cpp | 190 + .../test/cxx11_tensor_roundings.cpp | 62 + .../unsupported/test/cxx11_tensor_scan.cpp | 110 + .../test/cxx11_tensor_scan_cuda.cu | 79 + .../test/cxx11_tensor_shuffling.cpp | 228 + .../unsupported/test/cxx11_tensor_simple.cpp | 327 + .../test/cxx11_tensor_striding.cpp | 119 + .../unsupported/test/cxx11_tensor_sugar.cpp | 81 + .../unsupported/test/cxx11_tensor_sycl.cpp | 207 + .../test/cxx11_tensor_symmetry.cpp | 818 + .../test/cxx11_tensor_thread_pool.cpp | 373 + .../unsupported/test/cxx11_tensor_uint128.cpp | 160 + .../test/cxx11_tensor_volume_patch.cpp | 112 + thirdparty/eigen/unsupported/test/dgmres.cpp | 31 + .../eigen/unsupported/test/forward_adolc.cpp | 141 + thirdparty/eigen/unsupported/test/gmres.cpp | 31 + .../unsupported/test/kronecker_product.cpp | 252 + .../unsupported/test/levenberg_marquardt.cpp | 1477 ++ .../unsupported/test/matrix_exponential.cpp | 141 + .../unsupported/test/matrix_function.cpp | 193 + .../eigen/unsupported/test/matrix_functions.h | 67 + .../eigen/unsupported/test/matrix_power.cpp | 204 + .../unsupported/test/matrix_square_root.cpp | 31 + thirdparty/eigen/unsupported/test/minres.cpp | 44 + .../eigen/unsupported/test/mpreal/mpreal.h | 3104 +++ .../eigen/unsupported/test/mpreal_support.cpp | 65 + .../eigen/unsupported/test/openglsupport.cpp | 337 + .../unsupported/test/polynomialsolver.cpp | 218 + .../unsupported/test/polynomialutils.cpp | 113 + .../eigen/unsupported/test/sparse_extra.cpp | 147 + .../unsupported/test/special_functions.cpp | 345 + thirdparty/eigen/unsupported/test/splines.cpp | 281 + 1778 files changed, 314912 insertions(+), 30957 deletions(-) create mode 100644 thirdparty/eigen/.hgeol create mode 100644 thirdparty/eigen/.hgignore create mode 100644 thirdparty/eigen/.hgtags create mode 100644 thirdparty/eigen/CMakeLists.txt create mode 100644 thirdparty/eigen/COPYING.BSD create mode 100644 thirdparty/eigen/COPYING.GPL create mode 100644 thirdparty/eigen/COPYING.LGPL create mode 100644 thirdparty/eigen/COPYING.MINPACK create mode 100644 thirdparty/eigen/COPYING.MPL2 create mode 100644 thirdparty/eigen/COPYING.README create mode 100644 thirdparty/eigen/CTestConfig.cmake create mode 100644 thirdparty/eigen/CTestCustom.cmake.in create mode 100644 thirdparty/eigen/Eigen/CMakeLists.txt create mode 100644 thirdparty/eigen/Eigen/Cholesky create mode 100644 thirdparty/eigen/Eigen/CholmodSupport create mode 100644 thirdparty/eigen/Eigen/Core create mode 100644 thirdparty/eigen/Eigen/Dense create mode 100644 thirdparty/eigen/Eigen/Eigen create mode 100644 thirdparty/eigen/Eigen/Eigenvalues create mode 100644 thirdparty/eigen/Eigen/Geometry create mode 100644 thirdparty/eigen/Eigen/Householder create mode 100644 thirdparty/eigen/Eigen/IterativeLinearSolvers create mode 100644 thirdparty/eigen/Eigen/Jacobi create mode 100644 thirdparty/eigen/Eigen/LU create mode 100644 thirdparty/eigen/Eigen/MetisSupport create mode 100644 thirdparty/eigen/Eigen/OrderingMethods create mode 100644 thirdparty/eigen/Eigen/PaStiXSupport create mode 100755 thirdparty/eigen/Eigen/PardisoSupport create mode 100644 thirdparty/eigen/Eigen/QR create mode 100644 thirdparty/eigen/Eigen/QtAlignedMalloc create mode 100644 thirdparty/eigen/Eigen/SPQRSupport create mode 100644 thirdparty/eigen/Eigen/SVD create mode 100644 thirdparty/eigen/Eigen/Sparse create mode 100644 thirdparty/eigen/Eigen/SparseCholesky create mode 100644 thirdparty/eigen/Eigen/SparseCore create mode 100644 thirdparty/eigen/Eigen/SparseLU create mode 100644 thirdparty/eigen/Eigen/SparseQR create mode 100644 thirdparty/eigen/Eigen/StdDeque create mode 100644 thirdparty/eigen/Eigen/StdList create mode 100644 thirdparty/eigen/Eigen/StdVector create mode 100644 thirdparty/eigen/Eigen/SuperLUSupport create mode 100644 thirdparty/eigen/Eigen/UmfPackSupport create mode 100644 thirdparty/eigen/Eigen/src/Cholesky/LDLT.h create mode 100644 thirdparty/eigen/Eigen/src/Cholesky/LLT.h create mode 100644 thirdparty/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/CholmodSupport/CholmodSupport.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Array.h create mode 100644 thirdparty/eigen/Eigen/src/Core/ArrayBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/ArrayWrapper.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Assign.h create mode 100644 thirdparty/eigen/Eigen/src/Core/AssignEvaluator.h create mode 100755 thirdparty/eigen/Eigen/src/Core/Assign_MKL.h create mode 100644 thirdparty/eigen/Eigen/src/Core/BandMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Block.h create mode 100644 thirdparty/eigen/Eigen/src/Core/BooleanRedux.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CommaInitializer.h create mode 100644 thirdparty/eigen/Eigen/src/Core/ConditionEstimator.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CoreEvaluators.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CoreIterators.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CwiseBinaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CwiseNullaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CwiseTernaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CwiseUnaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/Core/CwiseUnaryView.h create mode 100644 thirdparty/eigen/Eigen/src/Core/DenseBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/DenseCoeffsBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/DenseStorage.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Diagonal.h create mode 100644 thirdparty/eigen/Eigen/src/Core/DiagonalMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/DiagonalProduct.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Dot.h create mode 100644 thirdparty/eigen/Eigen/src/Core/EigenBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/ForceAlignedAccess.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Fuzzy.h create mode 100644 thirdparty/eigen/Eigen/src/Core/GeneralProduct.h create mode 100644 thirdparty/eigen/Eigen/src/Core/GenericPacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/GlobalFunctions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/IO.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Inverse.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Map.h create mode 100644 thirdparty/eigen/Eigen/src/Core/MapBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/MathFunctions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/MathFunctionsImpl.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Matrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/MatrixBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/NestByValue.h create mode 100644 thirdparty/eigen/Eigen/src/Core/NoAlias.h create mode 100644 thirdparty/eigen/Eigen/src/Core/NumTraits.h create mode 100644 thirdparty/eigen/Eigen/src/Core/PermutationMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/PlainObjectBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Product.h create mode 100644 thirdparty/eigen/Eigen/src/Core/ProductEvaluators.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Random.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Redux.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Ref.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Replicate.h create mode 100644 thirdparty/eigen/Eigen/src/Core/ReturnByValue.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Reverse.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Select.h create mode 100644 thirdparty/eigen/Eigen/src/Core/SelfAdjointView.h create mode 100644 thirdparty/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Solve.h create mode 100644 thirdparty/eigen/Eigen/src/Core/SolveTriangular.h create mode 100644 thirdparty/eigen/Eigen/src/Core/SolverBase.h create mode 100644 thirdparty/eigen/Eigen/src/Core/StableNorm.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Stride.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Swap.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Transpose.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Transpositions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/TriangularMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/VectorBlock.h create mode 100644 thirdparty/eigen/Eigen/src/Core/VectorwiseOp.h create mode 100644 thirdparty/eigen/Eigen/src/Core/Visitor.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AVX/Complex.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AVX/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AltiVec/Complex.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h create mode 100755 thirdparty/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/CUDA/Complex.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/CUDA/Half.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/Default/Settings.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/NEON/Complex.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/NEON/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/SSE/Complex.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h create mode 100755 thirdparty/eigen/Eigen/src/Core/arch/SSE/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/ZVector/Complex.h create mode 100644 thirdparty/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h create mode 100755 thirdparty/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h create mode 100644 thirdparty/eigen/Eigen/src/Core/functors/AssignmentFunctors.h create mode 100644 thirdparty/eigen/Eigen/src/Core/functors/BinaryFunctors.h create mode 100644 thirdparty/eigen/Eigen/src/Core/functors/NullaryFunctors.h create mode 100644 thirdparty/eigen/Eigen/src/Core/functors/StlFunctors.h create mode 100644 thirdparty/eigen/Eigen/src/Core/functors/TernaryFunctors.h create mode 100644 thirdparty/eigen/Eigen/src/Core/functors/UnaryFunctors.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralMatrixVector.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/Parallelizer.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/SelfadjointProduct.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularMatrixVector.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h create mode 100644 thirdparty/eigen/Eigen/src/Core/products/TriangularSolverVector.h create mode 100755 thirdparty/eigen/Eigen/src/Core/util/BlasUtil.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/Constants.h create mode 100755 thirdparty/eigen/Eigen/src/Core/util/DisableStupidWarnings.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/ForwardDeclarations.h create mode 100755 thirdparty/eigen/Eigen/src/Core/util/MKL_support.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/Macros.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/Memory.h create mode 100755 thirdparty/eigen/Eigen/src/Core/util/Meta.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/NonMPL2.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/StaticAssert.h create mode 100644 thirdparty/eigen/Eigen/src/Core/util/XprHelper.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/ComplexSchur.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/EigenSolver.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/RealQZ.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/RealSchur.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/AlignedBox.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/AngleAxis.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/EulerAngles.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Homogeneous.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Hyperplane.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/OrthoMethods.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/ParametrizedLine.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Quaternion.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Rotation2D.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/RotationBase.h create mode 100755 thirdparty/eigen/Eigen/src/Geometry/Scaling.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Transform.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Translation.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/Umeyama.h create mode 100644 thirdparty/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h create mode 100644 thirdparty/eigen/Eigen/src/Householder/BlockHouseholder.h create mode 100644 thirdparty/eigen/Eigen/src/Householder/Householder.h create mode 100644 thirdparty/eigen/Eigen/src/Householder/HouseholderSequence.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h create mode 100644 thirdparty/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h create mode 100644 thirdparty/eigen/Eigen/src/Jacobi/Jacobi.h create mode 100644 thirdparty/eigen/Eigen/src/LU/Determinant.h create mode 100644 thirdparty/eigen/Eigen/src/LU/FullPivLU.h create mode 100644 thirdparty/eigen/Eigen/src/LU/InverseImpl.h create mode 100644 thirdparty/eigen/Eigen/src/LU/PartialPivLU.h create mode 100644 thirdparty/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/LU/arch/Inverse_SSE.h create mode 100644 thirdparty/eigen/Eigen/src/MetisSupport/MetisSupport.h create mode 100644 thirdparty/eigen/Eigen/src/OrderingMethods/Amd.h create mode 100644 thirdparty/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h create mode 100644 thirdparty/eigen/Eigen/src/OrderingMethods/Ordering.h create mode 100644 thirdparty/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h create mode 100644 thirdparty/eigen/Eigen/src/PardisoSupport/PardisoSupport.h create mode 100644 thirdparty/eigen/Eigen/src/QR/ColPivHouseholderQR.h create mode 100644 thirdparty/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h create mode 100644 thirdparty/eigen/Eigen/src/QR/FullPivHouseholderQR.h create mode 100644 thirdparty/eigen/Eigen/src/QR/HouseholderQR.h create mode 100644 thirdparty/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h create mode 100644 thirdparty/eigen/Eigen/src/SVD/BDCSVD.h create mode 100644 thirdparty/eigen/Eigen/src/SVD/JacobiSVD.h create mode 100644 thirdparty/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h create mode 100644 thirdparty/eigen/Eigen/src/SVD/SVDBase.h create mode 100644 thirdparty/eigen/Eigen/src/SVD/UpperBidiagonalization.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/AmbiVector.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/CompressedStorage.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseAssign.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseBlock.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseColEtree.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseCompressedBase.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseDenseProduct.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseDot.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseFuzzy.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseMap.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseMatrixBase.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparsePermutation.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseProduct.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseRedux.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseRef.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseSolverBase.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseTranspose.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseTriangularView.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseUtil.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseVector.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/SparseView.h create mode 100644 thirdparty/eigen/Eigen/src/SparseCore/TriangularSolver.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLUImpl.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_Memory.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_Structs.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_Utils.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h create mode 100644 thirdparty/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h create mode 100644 thirdparty/eigen/Eigen/src/SparseQR/SparseQR.h create mode 100644 thirdparty/eigen/Eigen/src/StlSupport/StdDeque.h create mode 100644 thirdparty/eigen/Eigen/src/StlSupport/StdList.h create mode 100644 thirdparty/eigen/Eigen/src/StlSupport/StdVector.h create mode 100644 thirdparty/eigen/Eigen/src/StlSupport/details.h create mode 100644 thirdparty/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h create mode 100644 thirdparty/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h create mode 100644 thirdparty/eigen/Eigen/src/misc/Image.h create mode 100644 thirdparty/eigen/Eigen/src/misc/Kernel.h create mode 100644 thirdparty/eigen/Eigen/src/misc/RealSvd2x2.h create mode 100644 thirdparty/eigen/Eigen/src/misc/blas.h create mode 100644 thirdparty/eigen/Eigen/src/misc/lapack.h create mode 100755 thirdparty/eigen/Eigen/src/misc/lapacke.h create mode 100644 thirdparty/eigen/Eigen/src/misc/lapacke_mangling.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/BlockMethods.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h create mode 100644 thirdparty/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h create mode 100644 thirdparty/eigen/INSTALL create mode 100644 thirdparty/eigen/README.md create mode 100644 thirdparty/eigen/bench/BenchSparseUtil.h create mode 100644 thirdparty/eigen/bench/BenchTimer.h create mode 100644 thirdparty/eigen/bench/BenchUtil.h create mode 100644 thirdparty/eigen/bench/README.txt create mode 100644 thirdparty/eigen/bench/analyze-blocking-sizes.cpp create mode 100644 thirdparty/eigen/bench/basicbench.cxxlist create mode 100644 thirdparty/eigen/bench/basicbenchmark.cpp create mode 100644 thirdparty/eigen/bench/basicbenchmark.h create mode 100644 thirdparty/eigen/bench/benchBlasGemm.cpp create mode 100644 thirdparty/eigen/bench/benchCholesky.cpp create mode 100644 thirdparty/eigen/bench/benchEigenSolver.cpp create mode 100644 thirdparty/eigen/bench/benchFFT.cpp create mode 100644 thirdparty/eigen/bench/benchGeometry.cpp create mode 100644 thirdparty/eigen/bench/benchVecAdd.cpp create mode 100644 thirdparty/eigen/bench/bench_gemm.cpp create mode 100755 thirdparty/eigen/bench/bench_multi_compilers.sh create mode 100644 thirdparty/eigen/bench/bench_norm.cpp create mode 100644 thirdparty/eigen/bench/bench_reverse.cpp create mode 100644 thirdparty/eigen/bench/bench_sum.cpp create mode 100755 thirdparty/eigen/bench/bench_unrolling create mode 100644 thirdparty/eigen/bench/benchmark-blocking-sizes.cpp create mode 100644 thirdparty/eigen/bench/benchmark.cpp create mode 100644 thirdparty/eigen/bench/benchmarkSlice.cpp create mode 100644 thirdparty/eigen/bench/benchmarkX.cpp create mode 100644 thirdparty/eigen/bench/benchmarkXcwise.cpp create mode 100755 thirdparty/eigen/bench/benchmark_suite create mode 100644 thirdparty/eigen/bench/btl/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/COPYING create mode 100644 thirdparty/eigen/bench/btl/README create mode 100644 thirdparty/eigen/bench/btl/actions/action_aat_product.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_ata_product.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_atv_product.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_axpby.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_axpy.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_cholesky.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_ger.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_hessenberg.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_lu_decomp.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_lu_solve.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product_bis.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_matrix_vector_product.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_partial_lu.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_rot.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_symv.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_syr2.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_trisolve.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_trisolve_matrix.hh create mode 100644 thirdparty/eigen/bench/btl/actions/action_trmm.hh create mode 100644 thirdparty/eigen/bench/btl/actions/basic_actions.hh create mode 100644 thirdparty/eigen/bench/btl/cmake/FindACML.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindATLAS.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindBLAZE.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindBlitz.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindCBLAS.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindGMM.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindMKL.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindMTL4.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindOPENBLAS.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindPackageHandleStandardArgs.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/FindTvmet.cmake create mode 100644 thirdparty/eigen/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake create mode 100644 thirdparty/eigen/bench/btl/data/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/data/action_settings.txt create mode 100644 thirdparty/eigen/bench/btl/data/gnuplot_common_settings.hh create mode 100755 thirdparty/eigen/bench/btl/data/go_mean create mode 100644 thirdparty/eigen/bench/btl/data/mean.cxx create mode 100644 thirdparty/eigen/bench/btl/data/mk_gnuplot_script.sh create mode 100644 thirdparty/eigen/bench/btl/data/mk_mean_script.sh create mode 100755 thirdparty/eigen/bench/btl/data/mk_new_gnuplot.sh create mode 100644 thirdparty/eigen/bench/btl/data/perlib_plot_settings.txt create mode 100644 thirdparty/eigen/bench/btl/data/regularize.cxx create mode 100644 thirdparty/eigen/bench/btl/data/smooth.cxx create mode 100755 thirdparty/eigen/bench/btl/data/smooth_all.sh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/bench.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/bench_parameter.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/btl.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/init/init_function.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/init/init_matrix.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/init/init_vector.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/static/bench_static.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/static/intel_bench_fixed_size.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/static/static_size_generator.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/STL_perf_analyzer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/STL_timer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/portable_perf_analyzer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/portable_perf_analyzer_old.hh create mode 100755 thirdparty/eigen/bench/btl/generic_bench/timers/portable_timer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/x86_perf_analyzer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/timers/x86_timer.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/utils/size_lin_log.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/utils/size_log.hh create mode 100644 thirdparty/eigen/bench/btl/generic_bench/utils/utilities.h create mode 100644 thirdparty/eigen/bench/btl/generic_bench/utils/xy_file.hh create mode 100644 thirdparty/eigen/bench/btl/libs/BLAS/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/BLAS/blas.h create mode 100644 thirdparty/eigen/bench/btl/libs/BLAS/blas_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh create mode 100644 thirdparty/eigen/bench/btl/libs/BLAS/c_interface_base.h create mode 100644 thirdparty/eigen/bench/btl/libs/BLAS/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/STL/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/STL/STL_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/STL/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/blaze/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/blaze/blaze_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/blaze/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/blitz/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/blitz/blitz_LU_solve_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/blitz/blitz_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/blitz/btl_blitz.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/blitz/btl_tiny_blitz.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/blitz/tiny_blitz_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/btl_tiny_eigen2.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/eigen2_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/main_adv.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/main_linear.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/main_matmat.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen2/main_vecmat.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/btl_tiny_eigen3.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/eigen3_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/main_adv.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/main_linear.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/main_matmat.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/eigen3/main_vecmat.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/gmm/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/gmm/gmm_LU_solve_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/gmm/gmm_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/gmm/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/mtl4/.kdbgrc.main create mode 100644 thirdparty/eigen/bench/btl/libs/mtl4/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/mtl4/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/mtl4/mtl4_LU_solve_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/mtl4/mtl4_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/tensors/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/tensors/main_linear.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/tensors/main_matmat.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/tensors/main_vecmat.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/tensors/tensor_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/tvmet/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/tvmet/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/tvmet/tvmet_interface.hh create mode 100644 thirdparty/eigen/bench/btl/libs/ublas/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/btl/libs/ublas/main.cpp create mode 100644 thirdparty/eigen/bench/btl/libs/ublas/ublas_interface.hh create mode 100644 thirdparty/eigen/bench/check_cache_queries.cpp create mode 100644 thirdparty/eigen/bench/dense_solvers.cpp create mode 100644 thirdparty/eigen/bench/eig33.cpp create mode 100644 thirdparty/eigen/bench/geometry.cpp create mode 100644 thirdparty/eigen/bench/perf_monitoring/gemm/changesets.txt create mode 100644 thirdparty/eigen/bench/perf_monitoring/gemm/gemm.cpp create mode 100644 thirdparty/eigen/bench/perf_monitoring/gemm/gemm_settings.txt create mode 100644 thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp create mode 100644 thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt create mode 100755 thirdparty/eigen/bench/perf_monitoring/gemm/make_plot.sh create mode 100755 thirdparty/eigen/bench/perf_monitoring/gemm/run.sh create mode 100644 thirdparty/eigen/bench/product_threshold.cpp create mode 100644 thirdparty/eigen/bench/quat_slerp.cpp create mode 100644 thirdparty/eigen/bench/quatmul.cpp create mode 100644 thirdparty/eigen/bench/sparse_cholesky.cpp create mode 100644 thirdparty/eigen/bench/sparse_dense_product.cpp create mode 100644 thirdparty/eigen/bench/sparse_lu.cpp create mode 100644 thirdparty/eigen/bench/sparse_product.cpp create mode 100644 thirdparty/eigen/bench/sparse_randomsetter.cpp create mode 100644 thirdparty/eigen/bench/sparse_setter.cpp create mode 100644 thirdparty/eigen/bench/sparse_transpose.cpp create mode 100644 thirdparty/eigen/bench/sparse_trisolver.cpp create mode 100644 thirdparty/eigen/bench/spbench/CMakeLists.txt create mode 100644 thirdparty/eigen/bench/spbench/sp_solver.cpp create mode 100644 thirdparty/eigen/bench/spbench/spbench.dtd create mode 100644 thirdparty/eigen/bench/spbench/spbenchsolver.cpp create mode 100644 thirdparty/eigen/bench/spbench/spbenchsolver.h create mode 100644 thirdparty/eigen/bench/spbench/spbenchstyle.h create mode 100644 thirdparty/eigen/bench/spbench/test_sparseLU.cpp create mode 100644 thirdparty/eigen/bench/spmv.cpp create mode 100644 thirdparty/eigen/bench/tensors/README create mode 100644 thirdparty/eigen/bench/tensors/benchmark.h create mode 100644 thirdparty/eigen/bench/tensors/benchmark_main.cc create mode 100644 thirdparty/eigen/bench/tensors/contraction_benchmarks_cpu.cc create mode 100644 thirdparty/eigen/bench/tensors/tensor_benchmarks.h create mode 100644 thirdparty/eigen/bench/tensors/tensor_benchmarks_cpu.cc create mode 100644 thirdparty/eigen/bench/tensors/tensor_benchmarks_fp16_gpu.cu create mode 100644 thirdparty/eigen/bench/tensors/tensor_benchmarks_gpu.cu create mode 100644 thirdparty/eigen/bench/tensors/tensor_benchmarks_sycl.cc create mode 100644 thirdparty/eigen/bench/vdw_new.cpp create mode 100644 thirdparty/eigen/blas/BandTriangularSolver.h create mode 100644 thirdparty/eigen/blas/CMakeLists.txt create mode 100644 thirdparty/eigen/blas/GeneralRank1Update.h create mode 100644 thirdparty/eigen/blas/PackedSelfadjointProduct.h create mode 100644 thirdparty/eigen/blas/PackedTriangularMatrixVector.h create mode 100644 thirdparty/eigen/blas/PackedTriangularSolverVector.h create mode 100644 thirdparty/eigen/blas/README.txt create mode 100644 thirdparty/eigen/blas/Rank2Update.h create mode 100644 thirdparty/eigen/blas/common.h create mode 100644 thirdparty/eigen/blas/complex_double.cpp create mode 100644 thirdparty/eigen/blas/complex_single.cpp create mode 100644 thirdparty/eigen/blas/double.cpp create mode 100644 thirdparty/eigen/blas/f2c/chbmv.c create mode 100644 thirdparty/eigen/blas/f2c/chpmv.c create mode 100644 thirdparty/eigen/blas/f2c/complexdots.c create mode 100644 thirdparty/eigen/blas/f2c/ctbmv.c create mode 100644 thirdparty/eigen/blas/f2c/d_cnjg.c create mode 100644 thirdparty/eigen/blas/f2c/datatypes.h create mode 100644 thirdparty/eigen/blas/f2c/drotm.c create mode 100644 thirdparty/eigen/blas/f2c/drotmg.c create mode 100644 thirdparty/eigen/blas/f2c/dsbmv.c create mode 100644 thirdparty/eigen/blas/f2c/dspmv.c create mode 100644 thirdparty/eigen/blas/f2c/dtbmv.c create mode 100644 thirdparty/eigen/blas/f2c/lsame.c create mode 100644 thirdparty/eigen/blas/f2c/r_cnjg.c create mode 100644 thirdparty/eigen/blas/f2c/srotm.c create mode 100644 thirdparty/eigen/blas/f2c/srotmg.c create mode 100644 thirdparty/eigen/blas/f2c/ssbmv.c create mode 100644 thirdparty/eigen/blas/f2c/sspmv.c create mode 100644 thirdparty/eigen/blas/f2c/stbmv.c create mode 100644 thirdparty/eigen/blas/f2c/zhbmv.c create mode 100644 thirdparty/eigen/blas/f2c/zhpmv.c create mode 100644 thirdparty/eigen/blas/f2c/ztbmv.c create mode 100644 thirdparty/eigen/blas/fortran/complexdots.f create mode 100644 thirdparty/eigen/blas/level1_cplx_impl.h create mode 100644 thirdparty/eigen/blas/level1_impl.h create mode 100644 thirdparty/eigen/blas/level1_real_impl.h create mode 100644 thirdparty/eigen/blas/level2_cplx_impl.h create mode 100644 thirdparty/eigen/blas/level2_impl.h create mode 100644 thirdparty/eigen/blas/level2_real_impl.h create mode 100644 thirdparty/eigen/blas/level3_impl.h create mode 100644 thirdparty/eigen/blas/single.cpp create mode 100644 thirdparty/eigen/blas/testing/CMakeLists.txt create mode 100644 thirdparty/eigen/blas/testing/cblat1.f create mode 100644 thirdparty/eigen/blas/testing/cblat2.dat create mode 100644 thirdparty/eigen/blas/testing/cblat2.f create mode 100644 thirdparty/eigen/blas/testing/cblat3.dat create mode 100644 thirdparty/eigen/blas/testing/cblat3.f create mode 100644 thirdparty/eigen/blas/testing/dblat1.f create mode 100644 thirdparty/eigen/blas/testing/dblat2.dat create mode 100644 thirdparty/eigen/blas/testing/dblat2.f create mode 100644 thirdparty/eigen/blas/testing/dblat3.dat create mode 100644 thirdparty/eigen/blas/testing/dblat3.f create mode 100755 thirdparty/eigen/blas/testing/runblastest.sh create mode 100644 thirdparty/eigen/blas/testing/sblat1.f create mode 100644 thirdparty/eigen/blas/testing/sblat2.dat create mode 100644 thirdparty/eigen/blas/testing/sblat2.f create mode 100644 thirdparty/eigen/blas/testing/sblat3.dat create mode 100644 thirdparty/eigen/blas/testing/sblat3.f create mode 100644 thirdparty/eigen/blas/testing/zblat1.f create mode 100644 thirdparty/eigen/blas/testing/zblat2.dat create mode 100644 thirdparty/eigen/blas/testing/zblat2.f create mode 100644 thirdparty/eigen/blas/testing/zblat3.dat create mode 100644 thirdparty/eigen/blas/testing/zblat3.f create mode 100644 thirdparty/eigen/blas/xerbla.cpp create mode 100644 thirdparty/eigen/cmake/Eigen3Config.cmake.in create mode 100644 thirdparty/eigen/cmake/EigenConfigureTesting.cmake create mode 100644 thirdparty/eigen/cmake/EigenDetermineOSVersion.cmake create mode 100644 thirdparty/eigen/cmake/EigenDetermineVSServicePack.cmake create mode 100644 thirdparty/eigen/cmake/EigenTesting.cmake create mode 100644 thirdparty/eigen/cmake/EigenUninstall.cmake create mode 100644 thirdparty/eigen/cmake/FindAdolc.cmake create mode 100644 thirdparty/eigen/cmake/FindBLAS.cmake create mode 100644 thirdparty/eigen/cmake/FindCholmod.cmake create mode 100644 thirdparty/eigen/cmake/FindComputeCpp.cmake create mode 100644 thirdparty/eigen/cmake/FindEigen2.cmake create mode 100644 thirdparty/eigen/cmake/FindEigen3.cmake create mode 100644 thirdparty/eigen/cmake/FindFFTW.cmake create mode 100644 thirdparty/eigen/cmake/FindGLEW.cmake create mode 100644 thirdparty/eigen/cmake/FindGMP.cmake create mode 100644 thirdparty/eigen/cmake/FindGSL.cmake create mode 100644 thirdparty/eigen/cmake/FindGoogleHash.cmake create mode 100644 thirdparty/eigen/cmake/FindLAPACK.cmake create mode 100644 thirdparty/eigen/cmake/FindMPFR.cmake create mode 100644 thirdparty/eigen/cmake/FindMetis.cmake create mode 100644 thirdparty/eigen/cmake/FindPastix.cmake create mode 100644 thirdparty/eigen/cmake/FindSPQR.cmake create mode 100644 thirdparty/eigen/cmake/FindScotch.cmake create mode 100644 thirdparty/eigen/cmake/FindStandardMathLibrary.cmake create mode 100644 thirdparty/eigen/cmake/FindSuperLU.cmake create mode 100644 thirdparty/eigen/cmake/FindUmfpack.cmake create mode 100644 thirdparty/eigen/cmake/RegexUtils.cmake create mode 100644 thirdparty/eigen/cmake/UseEigen3.cmake create mode 100644 thirdparty/eigen/cmake/language_support.cmake create mode 100644 thirdparty/eigen/debug/gdb/__init__.py create mode 100644 thirdparty/eigen/debug/gdb/printers.py create mode 100644 thirdparty/eigen/debug/msvc/eigen.natvis create mode 100644 thirdparty/eigen/debug/msvc/eigen_autoexp_part.dat create mode 100644 thirdparty/eigen/demos/CMakeLists.txt create mode 100644 thirdparty/eigen/demos/mandelbrot/CMakeLists.txt create mode 100644 thirdparty/eigen/demos/mandelbrot/README create mode 100644 thirdparty/eigen/demos/mandelbrot/mandelbrot.cpp create mode 100644 thirdparty/eigen/demos/mandelbrot/mandelbrot.h create mode 100644 thirdparty/eigen/demos/mix_eigen_and_c/README create mode 100644 thirdparty/eigen/demos/mix_eigen_and_c/binary_library.cpp create mode 100644 thirdparty/eigen/demos/mix_eigen_and_c/binary_library.h create mode 100644 thirdparty/eigen/demos/mix_eigen_and_c/example.c create mode 100644 thirdparty/eigen/demos/opengl/CMakeLists.txt create mode 100644 thirdparty/eigen/demos/opengl/README create mode 100644 thirdparty/eigen/demos/opengl/camera.cpp create mode 100644 thirdparty/eigen/demos/opengl/camera.h create mode 100644 thirdparty/eigen/demos/opengl/gpuhelper.cpp create mode 100644 thirdparty/eigen/demos/opengl/gpuhelper.h create mode 100644 thirdparty/eigen/demos/opengl/icosphere.cpp create mode 100644 thirdparty/eigen/demos/opengl/icosphere.h create mode 100644 thirdparty/eigen/demos/opengl/quaternion_demo.cpp create mode 100644 thirdparty/eigen/demos/opengl/quaternion_demo.h create mode 100644 thirdparty/eigen/demos/opengl/trackball.cpp create mode 100644 thirdparty/eigen/demos/opengl/trackball.h create mode 100644 thirdparty/eigen/doc/A05_PortingFrom2To3.dox create mode 100644 thirdparty/eigen/doc/AsciiQuickReference.txt create mode 100644 thirdparty/eigen/doc/B01_Experimental.dox create mode 100644 thirdparty/eigen/doc/CMakeLists.txt create mode 100644 thirdparty/eigen/doc/ClassHierarchy.dox create mode 100644 thirdparty/eigen/doc/CoeffwiseMathFunctionsTable.dox create mode 100644 thirdparty/eigen/doc/CustomizingEigen_CustomScalar.dox create mode 100644 thirdparty/eigen/doc/CustomizingEigen_InheritingMatrix.dox create mode 100644 thirdparty/eigen/doc/CustomizingEigen_NullaryExpr.dox create mode 100644 thirdparty/eigen/doc/CustomizingEigen_Plugins.dox create mode 100644 thirdparty/eigen/doc/DenseDecompositionBenchmark.dox create mode 100644 thirdparty/eigen/doc/Doxyfile.in create mode 100644 thirdparty/eigen/doc/Eigen_Silly_Professor_64x64.png create mode 100644 thirdparty/eigen/doc/FixedSizeVectorizable.dox create mode 100644 thirdparty/eigen/doc/FunctionsTakingEigenTypes.dox create mode 100644 thirdparty/eigen/doc/HiPerformance.dox create mode 100644 thirdparty/eigen/doc/InplaceDecomposition.dox create mode 100644 thirdparty/eigen/doc/InsideEigenExample.dox create mode 100644 thirdparty/eigen/doc/LeastSquares.dox create mode 100644 thirdparty/eigen/doc/Manual.dox create mode 100644 thirdparty/eigen/doc/MatrixfreeSolverExample.dox create mode 100644 thirdparty/eigen/doc/NewExpressionType.dox create mode 100644 thirdparty/eigen/doc/Overview.dox create mode 100644 thirdparty/eigen/doc/PassingByValue.dox create mode 100644 thirdparty/eigen/doc/Pitfalls.dox create mode 100644 thirdparty/eigen/doc/PreprocessorDirectives.dox create mode 100644 thirdparty/eigen/doc/QuickReference.dox create mode 100644 thirdparty/eigen/doc/QuickStartGuide.dox create mode 100644 thirdparty/eigen/doc/SparseLinearSystems.dox create mode 100644 thirdparty/eigen/doc/SparseQuickReference.dox create mode 100644 thirdparty/eigen/doc/StlContainers.dox create mode 100644 thirdparty/eigen/doc/StorageOrders.dox create mode 100644 thirdparty/eigen/doc/StructHavingEigenMembers.dox create mode 100644 thirdparty/eigen/doc/TemplateKeyword.dox create mode 100644 thirdparty/eigen/doc/TopicAliasing.dox create mode 100644 thirdparty/eigen/doc/TopicAssertions.dox create mode 100644 thirdparty/eigen/doc/TopicEigenExpressionTemplates.dox create mode 100644 thirdparty/eigen/doc/TopicLazyEvaluation.dox create mode 100644 thirdparty/eigen/doc/TopicLinearAlgebraDecompositions.dox create mode 100644 thirdparty/eigen/doc/TopicMultithreading.dox create mode 100644 thirdparty/eigen/doc/TopicResizing.dox create mode 100644 thirdparty/eigen/doc/TopicScalarTypes.dox create mode 100644 thirdparty/eigen/doc/TopicVectorization.dox create mode 100644 thirdparty/eigen/doc/TutorialAdvancedInitialization.dox create mode 100644 thirdparty/eigen/doc/TutorialArrayClass.dox create mode 100644 thirdparty/eigen/doc/TutorialBlockOperations.dox create mode 100644 thirdparty/eigen/doc/TutorialGeometry.dox create mode 100644 thirdparty/eigen/doc/TutorialLinearAlgebra.dox create mode 100644 thirdparty/eigen/doc/TutorialMapClass.dox create mode 100644 thirdparty/eigen/doc/TutorialMatrixArithmetic.dox create mode 100644 thirdparty/eigen/doc/TutorialMatrixClass.dox create mode 100644 thirdparty/eigen/doc/TutorialReductionsVisitorsBroadcasting.dox create mode 100644 thirdparty/eigen/doc/TutorialReshapeSlicing.dox create mode 100644 thirdparty/eigen/doc/TutorialSparse.dox create mode 100644 thirdparty/eigen/doc/TutorialSparse_example_details.dox create mode 100644 thirdparty/eigen/doc/UnalignedArrayAssert.dox create mode 100644 thirdparty/eigen/doc/UsingBlasLapackBackends.dox create mode 100644 thirdparty/eigen/doc/UsingIntelMKL.dox create mode 100644 thirdparty/eigen/doc/UsingNVCC.dox create mode 100644 thirdparty/eigen/doc/WrongStackAlignment.dox create mode 100644 thirdparty/eigen/doc/eigen_navtree_hacks.js create mode 100644 thirdparty/eigen/doc/eigendoxy.css create mode 100644 thirdparty/eigen/doc/eigendoxy_footer.html.in create mode 100644 thirdparty/eigen/doc/eigendoxy_header.html.in create mode 100644 thirdparty/eigen/doc/eigendoxy_layout.xml.in create mode 100644 thirdparty/eigen/doc/eigendoxy_tabs.css create mode 100644 thirdparty/eigen/doc/examples/.krazy create mode 100644 thirdparty/eigen/doc/examples/CMakeLists.txt create mode 100644 thirdparty/eigen/doc/examples/CustomizingEigen_Inheritance.cpp create mode 100644 thirdparty/eigen/doc/examples/Cwise_erf.cpp create mode 100644 thirdparty/eigen/doc/examples/Cwise_erfc.cpp create mode 100644 thirdparty/eigen/doc/examples/Cwise_lgamma.cpp create mode 100644 thirdparty/eigen/doc/examples/DenseBase_middleCols_int.cpp create mode 100644 thirdparty/eigen/doc/examples/DenseBase_middleRows_int.cpp create mode 100644 thirdparty/eigen/doc/examples/DenseBase_template_int_middleCols.cpp create mode 100644 thirdparty/eigen/doc/examples/DenseBase_template_int_middleRows.cpp create mode 100644 thirdparty/eigen/doc/examples/QuickStart_example.cpp create mode 100644 thirdparty/eigen/doc/examples/QuickStart_example2_dynamic.cpp create mode 100644 thirdparty/eigen/doc/examples/QuickStart_example2_fixed.cpp create mode 100644 thirdparty/eigen/doc/examples/TemplateKeyword_flexible.cpp create mode 100644 thirdparty/eigen/doc/examples/TemplateKeyword_simple.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialInplaceLU.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp create mode 100644 thirdparty/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_BlockOperations_colrow.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_BlockOperations_corner.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_BlockOperations_print_block.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_BlockOperations_vector.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_PartialLU_solve.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple_rowwise.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_colwise.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_rowwise.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp create mode 100644 thirdparty/eigen/doc/examples/Tutorial_simple_example_fixed_size.cpp create mode 100644 thirdparty/eigen/doc/examples/class_Block.cpp create mode 100644 thirdparty/eigen/doc/examples/class_CwiseBinaryOp.cpp create mode 100644 thirdparty/eigen/doc/examples/class_CwiseUnaryOp.cpp create mode 100644 thirdparty/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp create mode 100644 thirdparty/eigen/doc/examples/class_FixedBlock.cpp create mode 100644 thirdparty/eigen/doc/examples/class_FixedVectorBlock.cpp create mode 100644 thirdparty/eigen/doc/examples/class_VectorBlock.cpp create mode 100644 thirdparty/eigen/doc/examples/function_taking_eigenbase.cpp create mode 100644 thirdparty/eigen/doc/examples/function_taking_ref.cpp create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp.entry create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp.evaluator create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp.expression create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp.main create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp.preamble create mode 100644 thirdparty/eigen/doc/examples/make_circulant.cpp.traits create mode 100644 thirdparty/eigen/doc/examples/make_circulant2.cpp create mode 100644 thirdparty/eigen/doc/examples/matrixfree_cg.cpp create mode 100644 thirdparty/eigen/doc/examples/nullary_indexing.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_arithmetic_add_sub.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_arithmetic_dot_cross.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_arithmetic_redux_basic.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_matrix_resize.cpp create mode 100644 thirdparty/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp create mode 100644 thirdparty/eigen/doc/ftv2node.png create mode 100644 thirdparty/eigen/doc/ftv2pnode.png create mode 100644 thirdparty/eigen/doc/snippets/.krazy create mode 100644 thirdparty/eigen/doc/snippets/AngleAxis_mimic_euler.cpp create mode 100644 thirdparty/eigen/doc/snippets/BiCGSTAB_simple.cpp create mode 100644 thirdparty/eigen/doc/snippets/BiCGSTAB_step_by_step.cpp create mode 100644 thirdparty/eigen/doc/snippets/CMakeLists.txt create mode 100644 thirdparty/eigen/doc/snippets/ColPivHouseholderQR_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/ComplexEigenSolver_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvalues.cpp create mode 100644 thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvectors.cpp create mode 100644 thirdparty/eigen/doc/snippets/ComplexSchur_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/ComplexSchur_matrixT.cpp create mode 100644 thirdparty/eigen/doc/snippets/ComplexSchur_matrixU.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_abs.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_abs2.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_acos.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_arg.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_array_power_array.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_asin.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_atan.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_boolean_and.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_boolean_not.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_boolean_or.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_ceil.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_cos.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_cosh.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_cube.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_equal_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_exp.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_floor.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_greater.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_greater_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_inverse.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_isFinite.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_isInf.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_isNaN.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_less.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_less_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_log.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_log10.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_max.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_min.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_minus.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_minus_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_not_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_plus.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_plus_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_pow.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_product.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_quotient.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_round.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_scalar_power_array.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_sign.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_sin.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_sinh.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_slash_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_sqrt.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_square.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_tan.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_tanh.cpp create mode 100644 thirdparty/eigen/doc/snippets/Cwise_times_equal.cpp create mode 100644 thirdparty/eigen/doc/snippets/DenseBase_LinSpaced.cpp create mode 100644 thirdparty/eigen/doc/snippets/DenseBase_LinSpacedInt.cpp create mode 100644 thirdparty/eigen/doc/snippets/DenseBase_LinSpaced_seq.cpp create mode 100644 thirdparty/eigen/doc/snippets/DenseBase_setLinSpaced.cpp create mode 100644 thirdparty/eigen/doc/snippets/DirectionWise_hnormalized.cpp create mode 100644 thirdparty/eigen/doc/snippets/DirectionWise_replicate.cpp create mode 100644 thirdparty/eigen/doc/snippets/DirectionWise_replicate_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/EigenSolver_EigenSolver_MatrixType.cpp create mode 100644 thirdparty/eigen/doc/snippets/EigenSolver_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/EigenSolver_eigenvalues.cpp create mode 100644 thirdparty/eigen/doc/snippets/EigenSolver_eigenvectors.cpp create mode 100644 thirdparty/eigen/doc/snippets/EigenSolver_pseudoEigenvectors.cpp create mode 100644 thirdparty/eigen/doc/snippets/FullPivHouseholderQR_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/FullPivLU_image.cpp create mode 100644 thirdparty/eigen/doc/snippets/FullPivLU_kernel.cpp create mode 100644 thirdparty/eigen/doc/snippets/FullPivLU_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/GeneralizedEigenSolver.cpp create mode 100644 thirdparty/eigen/doc/snippets/HessenbergDecomposition_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/HessenbergDecomposition_matrixH.cpp create mode 100644 thirdparty/eigen/doc/snippets/HessenbergDecomposition_packedMatrix.cpp create mode 100644 thirdparty/eigen/doc/snippets/HouseholderQR_householderQ.cpp create mode 100644 thirdparty/eigen/doc/snippets/HouseholderQR_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/HouseholderSequence_HouseholderSequence.cpp create mode 100644 thirdparty/eigen/doc/snippets/IOFormat.cpp create mode 100644 thirdparty/eigen/doc/snippets/JacobiSVD_basic.cpp create mode 100644 thirdparty/eigen/doc/snippets/Jacobi_makeGivens.cpp create mode 100644 thirdparty/eigen/doc/snippets/Jacobi_makeJacobi.cpp create mode 100644 thirdparty/eigen/doc/snippets/LLT_example.cpp create mode 100644 thirdparty/eigen/doc/snippets/LLT_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/LeastSquaresNormalEquations.cpp create mode 100644 thirdparty/eigen/doc/snippets/LeastSquaresQR.cpp create mode 100644 thirdparty/eigen/doc/snippets/Map_general_stride.cpp create mode 100644 thirdparty/eigen/doc/snippets/Map_inner_stride.cpp create mode 100644 thirdparty/eigen/doc/snippets/Map_outer_stride.cpp create mode 100644 thirdparty/eigen/doc/snippets/Map_placement_new.cpp create mode 100644 thirdparty/eigen/doc/snippets/Map_simple.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_adjoint.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_all.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheLeft.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheRight.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_array.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_array_const.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_asDiagonal.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_block_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_block_int_int_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_bottomLeftCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_bottomRightCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_bottomRows_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cast.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_col.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_colwise.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_computeInverseAndDetWithCheck.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_computeInverseWithCheck.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs2.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseEqual.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseInverse.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseMax.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseMin.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseNotEqual.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseProduct.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseQuotient.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseSign.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_cwiseSqrt.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_diagonal.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_diagonal_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_diagonal_template_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_eigenvalues.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_end_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_eval.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_fixedBlock_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_hnormalized.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_homogeneous.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_identity.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_identity_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_inverse.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_isDiagonal.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_isIdentity.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_isOnes.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_isOrthogonal.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_isUnitary.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_isZero.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_leftCols_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_noalias.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_ones.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_ones_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_ones_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_operatorNorm.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_prod.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_random.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_random_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_random_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_replicate.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_replicate_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_reverse.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_rightCols_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_row.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_rowwise.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_segment_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_select.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_set.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_setIdentity.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_setOnes.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_setRandom.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_setZero.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_start_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_bottomRows.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_end.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_block_int_int_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_leftCols.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_rightCols.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_segment.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_start.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_template_int_topRows.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_topLeftCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_topRightCorner_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_topRows_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_transpose.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_triangularView.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_zero.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_zero_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/MatrixBase_zero_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_resize_NoChange_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_resize_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_resize_int_NoChange.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_resize_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setConstant_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setConstant_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setIdentity_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setOnes_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setOnes_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setRandom_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setRandom_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setZero_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/Matrix_setZero_int_int.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialPivLU_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_count.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_maxCoeff.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_minCoeff.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_norm.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_prod.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_squaredNorm.cpp create mode 100644 thirdparty/eigen/doc/snippets/PartialRedux_sum.cpp create mode 100644 thirdparty/eigen/doc/snippets/RealQZ_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/RealSchur_RealSchur_MatrixType.cpp create mode 100644 thirdparty/eigen/doc/snippets/RealSchur_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType2.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvalues.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvectors.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorInverseSqrt.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorSqrt.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointView_eigenvalues.cpp create mode 100644 thirdparty/eigen/doc/snippets/SelfAdjointView_operatorNorm.cpp create mode 100644 thirdparty/eigen/doc/snippets/SparseMatrix_coeffs.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_block.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_block_correct.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_cwise.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_mult1.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_mult2.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_mult3.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_mult4.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicAliasing_mult5.cpp create mode 100644 thirdparty/eigen/doc/snippets/TopicStorageOrders_example.cpp create mode 100644 thirdparty/eigen/doc/snippets/Triangular_solve.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tridiagonalization_Tridiagonalization_MatrixType.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tridiagonalization_compute.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tridiagonalization_decomposeInPlace.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tridiagonalization_diagonal.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tridiagonalization_householderCoefficients.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tridiagonalization_packedMatrix.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Block.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_CommaTemporary.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_LinSpaced.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_ThreeWays.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Zero.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_Map_rowmajor.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_Map_using.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Mat.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Vec.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_SlicingCol.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_SlicingVec.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_commainit_01.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_commainit_01b.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_commainit_02.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_solve_matrix_inverse.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_solve_multiple_rhs.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_solve_reuse_decomposition.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_solve_singular.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_solve_triangular.cpp create mode 100644 thirdparty/eigen/doc/snippets/Tutorial_solve_triangular_inplace.cpp create mode 100644 thirdparty/eigen/doc/snippets/VectorwiseOp_homogeneous.cpp create mode 100644 thirdparty/eigen/doc/snippets/Vectorwise_reverse.cpp create mode 100644 thirdparty/eigen/doc/snippets/class_FullPivLU.cpp create mode 100644 thirdparty/eigen/doc/snippets/compile_snippet.cpp.in create mode 100644 thirdparty/eigen/doc/snippets/tut_arithmetic_redux_minmax.cpp create mode 100644 thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_aliasing.cpp create mode 100644 thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_conjugate.cpp create mode 100644 thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_inplace.cpp create mode 100644 thirdparty/eigen/doc/snippets/tut_matrix_assignment_resizing.cpp create mode 100644 thirdparty/eigen/doc/special_examples/CMakeLists.txt create mode 100644 thirdparty/eigen/doc/special_examples/Tutorial_sparse_example.cpp create mode 100644 thirdparty/eigen/doc/special_examples/Tutorial_sparse_example_details.cpp create mode 100644 thirdparty/eigen/doc/special_examples/random_cpp11.cpp create mode 100644 thirdparty/eigen/doc/tutorial.cpp create mode 100644 thirdparty/eigen/eigen3.pc.in create mode 100644 thirdparty/eigen/failtest/CMakeLists.txt create mode 100644 thirdparty/eigen/failtest/bdcsvd_int.cpp create mode 100644 thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_0.cpp create mode 100644 thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_1.cpp create mode 100644 thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_2.cpp create mode 100644 thirdparty/eigen/failtest/block_on_const_type_actually_const_0.cpp create mode 100644 thirdparty/eigen/failtest/block_on_const_type_actually_const_1.cpp create mode 100644 thirdparty/eigen/failtest/colpivqr_int.cpp create mode 100644 thirdparty/eigen/failtest/const_qualified_block_method_retval_0.cpp create mode 100644 thirdparty/eigen/failtest/const_qualified_block_method_retval_1.cpp create mode 100644 thirdparty/eigen/failtest/const_qualified_diagonal_method_retval.cpp create mode 100644 thirdparty/eigen/failtest/const_qualified_transpose_method_retval.cpp create mode 100644 thirdparty/eigen/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp create mode 100644 thirdparty/eigen/failtest/cwiseunaryview_on_const_type_actually_const.cpp create mode 100644 thirdparty/eigen/failtest/diagonal_nonconst_ctor_on_const_xpr.cpp create mode 100644 thirdparty/eigen/failtest/diagonal_on_const_type_actually_const.cpp create mode 100644 thirdparty/eigen/failtest/eigensolver_cplx.cpp create mode 100644 thirdparty/eigen/failtest/eigensolver_int.cpp create mode 100644 thirdparty/eigen/failtest/failtest_sanity_check.cpp create mode 100644 thirdparty/eigen/failtest/fullpivlu_int.cpp create mode 100644 thirdparty/eigen/failtest/fullpivqr_int.cpp create mode 100644 thirdparty/eigen/failtest/jacobisvd_int.cpp create mode 100644 thirdparty/eigen/failtest/ldlt_int.cpp create mode 100644 thirdparty/eigen/failtest/llt_int.cpp create mode 100644 thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_0.cpp create mode 100644 thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_1.cpp create mode 100644 thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_2.cpp create mode 100644 thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_3.cpp create mode 100644 thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_4.cpp create mode 100644 thirdparty/eigen/failtest/map_on_const_type_actually_const_0.cpp create mode 100644 thirdparty/eigen/failtest/map_on_const_type_actually_const_1.cpp create mode 100644 thirdparty/eigen/failtest/partialpivlu_int.cpp create mode 100644 thirdparty/eigen/failtest/qr_int.cpp create mode 100644 thirdparty/eigen/failtest/ref_1.cpp create mode 100644 thirdparty/eigen/failtest/ref_2.cpp create mode 100644 thirdparty/eigen/failtest/ref_3.cpp create mode 100644 thirdparty/eigen/failtest/ref_4.cpp create mode 100644 thirdparty/eigen/failtest/ref_5.cpp create mode 100644 thirdparty/eigen/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp create mode 100644 thirdparty/eigen/failtest/selfadjointview_on_const_type_actually_const.cpp create mode 100644 thirdparty/eigen/failtest/sparse_ref_1.cpp create mode 100644 thirdparty/eigen/failtest/sparse_ref_2.cpp create mode 100644 thirdparty/eigen/failtest/sparse_ref_3.cpp create mode 100644 thirdparty/eigen/failtest/sparse_ref_4.cpp create mode 100644 thirdparty/eigen/failtest/sparse_ref_5.cpp create mode 100644 thirdparty/eigen/failtest/sparse_storage_mismatch.cpp create mode 100644 thirdparty/eigen/failtest/swap_1.cpp create mode 100644 thirdparty/eigen/failtest/swap_2.cpp create mode 100644 thirdparty/eigen/failtest/ternary_1.cpp create mode 100644 thirdparty/eigen/failtest/ternary_2.cpp create mode 100644 thirdparty/eigen/failtest/transpose_nonconst_ctor_on_const_xpr.cpp create mode 100644 thirdparty/eigen/failtest/transpose_on_const_type_actually_const.cpp create mode 100644 thirdparty/eigen/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp create mode 100644 thirdparty/eigen/failtest/triangularview_on_const_type_actually_const.cpp create mode 100644 thirdparty/eigen/lapack/CMakeLists.txt create mode 100644 thirdparty/eigen/lapack/cholesky.cpp create mode 100644 thirdparty/eigen/lapack/clacgv.f create mode 100644 thirdparty/eigen/lapack/cladiv.f create mode 100644 thirdparty/eigen/lapack/clarf.f create mode 100644 thirdparty/eigen/lapack/clarfb.f create mode 100644 thirdparty/eigen/lapack/clarfg.f create mode 100644 thirdparty/eigen/lapack/clarft.f create mode 100644 thirdparty/eigen/lapack/complex_double.cpp create mode 100644 thirdparty/eigen/lapack/complex_single.cpp create mode 100644 thirdparty/eigen/lapack/dladiv.f create mode 100644 thirdparty/eigen/lapack/dlamch.f create mode 100644 thirdparty/eigen/lapack/dlapy2.f create mode 100644 thirdparty/eigen/lapack/dlapy3.f create mode 100644 thirdparty/eigen/lapack/dlarf.f create mode 100644 thirdparty/eigen/lapack/dlarfb.f create mode 100644 thirdparty/eigen/lapack/dlarfg.f create mode 100644 thirdparty/eigen/lapack/dlarft.f create mode 100644 thirdparty/eigen/lapack/double.cpp create mode 100644 thirdparty/eigen/lapack/dsecnd_NONE.f create mode 100644 thirdparty/eigen/lapack/eigenvalues.cpp create mode 100644 thirdparty/eigen/lapack/ilaclc.f create mode 100644 thirdparty/eigen/lapack/ilaclr.f create mode 100644 thirdparty/eigen/lapack/iladlc.f create mode 100644 thirdparty/eigen/lapack/iladlr.f create mode 100644 thirdparty/eigen/lapack/ilaslc.f create mode 100644 thirdparty/eigen/lapack/ilaslr.f create mode 100644 thirdparty/eigen/lapack/ilazlc.f create mode 100644 thirdparty/eigen/lapack/ilazlr.f create mode 100644 thirdparty/eigen/lapack/lapack_common.h create mode 100644 thirdparty/eigen/lapack/lu.cpp create mode 100644 thirdparty/eigen/lapack/second_NONE.f create mode 100644 thirdparty/eigen/lapack/single.cpp create mode 100644 thirdparty/eigen/lapack/sladiv.f create mode 100644 thirdparty/eigen/lapack/slamch.f create mode 100644 thirdparty/eigen/lapack/slapy2.f create mode 100644 thirdparty/eigen/lapack/slapy3.f create mode 100644 thirdparty/eigen/lapack/slarf.f create mode 100644 thirdparty/eigen/lapack/slarfb.f create mode 100644 thirdparty/eigen/lapack/slarfg.f create mode 100644 thirdparty/eigen/lapack/slarft.f create mode 100644 thirdparty/eigen/lapack/svd.cpp create mode 100644 thirdparty/eigen/lapack/zlacgv.f create mode 100644 thirdparty/eigen/lapack/zladiv.f create mode 100644 thirdparty/eigen/lapack/zlarf.f create mode 100644 thirdparty/eigen/lapack/zlarfb.f create mode 100644 thirdparty/eigen/lapack/zlarfg.f create mode 100644 thirdparty/eigen/lapack/zlarft.f create mode 100644 thirdparty/eigen/scripts/CMakeLists.txt create mode 100755 thirdparty/eigen/scripts/buildtests.in create mode 100644 thirdparty/eigen/scripts/cdashtesting.cmake.in create mode 100755 thirdparty/eigen/scripts/check.in create mode 100755 thirdparty/eigen/scripts/debug.in create mode 100644 thirdparty/eigen/scripts/eigen_gen_credits.cpp create mode 100644 thirdparty/eigen/scripts/eigen_gen_docs create mode 100755 thirdparty/eigen/scripts/release.in create mode 100644 thirdparty/eigen/scripts/relicense.py create mode 100644 thirdparty/eigen/signature_of_eigen3_matrix_library create mode 100644 thirdparty/eigen/test/CMakeLists.txt create mode 100644 thirdparty/eigen/test/adjoint.cpp create mode 100644 thirdparty/eigen/test/array.cpp create mode 100644 thirdparty/eigen/test/array_for_matrix.cpp create mode 100644 thirdparty/eigen/test/array_of_string.cpp create mode 100644 thirdparty/eigen/test/array_replicate.cpp create mode 100644 thirdparty/eigen/test/array_reverse.cpp create mode 100644 thirdparty/eigen/test/bandmatrix.cpp create mode 100644 thirdparty/eigen/test/basicstuff.cpp create mode 100644 thirdparty/eigen/test/bdcsvd.cpp create mode 100644 thirdparty/eigen/test/bicgstab.cpp create mode 100644 thirdparty/eigen/test/block.cpp create mode 100644 thirdparty/eigen/test/boostmultiprec.cpp create mode 100644 thirdparty/eigen/test/bug1213.cpp create mode 100644 thirdparty/eigen/test/bug1213.h create mode 100644 thirdparty/eigen/test/bug1213_main.cpp create mode 100644 thirdparty/eigen/test/cholesky.cpp create mode 100644 thirdparty/eigen/test/cholmod_support.cpp create mode 100644 thirdparty/eigen/test/commainitializer.cpp create mode 100644 thirdparty/eigen/test/conjugate_gradient.cpp create mode 100644 thirdparty/eigen/test/conservative_resize.cpp create mode 100644 thirdparty/eigen/test/corners.cpp create mode 100644 thirdparty/eigen/test/ctorleak.cpp create mode 100644 thirdparty/eigen/test/cuda_basic.cu create mode 100644 thirdparty/eigen/test/cuda_common.h create mode 100644 thirdparty/eigen/test/denseLM.cpp create mode 100644 thirdparty/eigen/test/dense_storage.cpp create mode 100644 thirdparty/eigen/test/determinant.cpp create mode 100644 thirdparty/eigen/test/diagonal.cpp create mode 100644 thirdparty/eigen/test/diagonalmatrices.cpp create mode 100644 thirdparty/eigen/test/dontalign.cpp create mode 100644 thirdparty/eigen/test/dynalloc.cpp create mode 100644 thirdparty/eigen/test/eigen2support.cpp create mode 100644 thirdparty/eigen/test/eigensolver_complex.cpp create mode 100644 thirdparty/eigen/test/eigensolver_generalized_real.cpp create mode 100644 thirdparty/eigen/test/eigensolver_generic.cpp create mode 100644 thirdparty/eigen/test/eigensolver_selfadjoint.cpp create mode 100644 thirdparty/eigen/test/evaluator_common.h create mode 100644 thirdparty/eigen/test/evaluators.cpp create mode 100644 thirdparty/eigen/test/exceptions.cpp create mode 100644 thirdparty/eigen/test/fastmath.cpp create mode 100644 thirdparty/eigen/test/first_aligned.cpp create mode 100644 thirdparty/eigen/test/geo_alignedbox.cpp create mode 100644 thirdparty/eigen/test/geo_eulerangles.cpp create mode 100644 thirdparty/eigen/test/geo_homogeneous.cpp create mode 100644 thirdparty/eigen/test/geo_hyperplane.cpp create mode 100644 thirdparty/eigen/test/geo_orthomethods.cpp create mode 100644 thirdparty/eigen/test/geo_parametrizedline.cpp create mode 100644 thirdparty/eigen/test/geo_quaternion.cpp create mode 100755 thirdparty/eigen/test/geo_transformations.cpp create mode 100644 thirdparty/eigen/test/half_float.cpp create mode 100644 thirdparty/eigen/test/hessenberg.cpp create mode 100644 thirdparty/eigen/test/householder.cpp create mode 100644 thirdparty/eigen/test/incomplete_cholesky.cpp create mode 100644 thirdparty/eigen/test/inplace_decomposition.cpp create mode 100644 thirdparty/eigen/test/integer_types.cpp create mode 100644 thirdparty/eigen/test/inverse.cpp create mode 100644 thirdparty/eigen/test/is_same_dense.cpp create mode 100644 thirdparty/eigen/test/jacobi.cpp create mode 100644 thirdparty/eigen/test/jacobisvd.cpp create mode 100644 thirdparty/eigen/test/linearstructure.cpp create mode 100644 thirdparty/eigen/test/lscg.cpp create mode 100644 thirdparty/eigen/test/lu.cpp create mode 100644 thirdparty/eigen/test/main.h create mode 100644 thirdparty/eigen/test/mapped_matrix.cpp create mode 100644 thirdparty/eigen/test/mapstaticmethods.cpp create mode 100644 thirdparty/eigen/test/mapstride.cpp create mode 100644 thirdparty/eigen/test/meta.cpp create mode 100644 thirdparty/eigen/test/metis_support.cpp create mode 100644 thirdparty/eigen/test/miscmatrices.cpp create mode 100644 thirdparty/eigen/test/mixingtypes.cpp create mode 100644 thirdparty/eigen/test/mpl2only.cpp create mode 100644 thirdparty/eigen/test/nesting_ops.cpp create mode 100644 thirdparty/eigen/test/nomalloc.cpp create mode 100644 thirdparty/eigen/test/nullary.cpp create mode 100644 thirdparty/eigen/test/packetmath.cpp create mode 100644 thirdparty/eigen/test/pardiso_support.cpp create mode 100644 thirdparty/eigen/test/pastix_support.cpp create mode 100644 thirdparty/eigen/test/permutationmatrices.cpp create mode 100644 thirdparty/eigen/test/prec_inverse_4x4.cpp create mode 100644 thirdparty/eigen/test/product.h create mode 100644 thirdparty/eigen/test/product_extra.cpp create mode 100644 thirdparty/eigen/test/product_large.cpp create mode 100644 thirdparty/eigen/test/product_mmtr.cpp create mode 100644 thirdparty/eigen/test/product_notemporary.cpp create mode 100644 thirdparty/eigen/test/product_selfadjoint.cpp create mode 100644 thirdparty/eigen/test/product_small.cpp create mode 100644 thirdparty/eigen/test/product_symm.cpp create mode 100644 thirdparty/eigen/test/product_syrk.cpp create mode 100644 thirdparty/eigen/test/product_trmm.cpp create mode 100644 thirdparty/eigen/test/product_trmv.cpp create mode 100644 thirdparty/eigen/test/product_trsolve.cpp create mode 100644 thirdparty/eigen/test/qr.cpp create mode 100644 thirdparty/eigen/test/qr_colpivoting.cpp create mode 100644 thirdparty/eigen/test/qr_fullpivoting.cpp create mode 100644 thirdparty/eigen/test/qtvector.cpp create mode 100644 thirdparty/eigen/test/rand.cpp create mode 100644 thirdparty/eigen/test/real_qz.cpp create mode 100644 thirdparty/eigen/test/redux.cpp create mode 100644 thirdparty/eigen/test/ref.cpp create mode 100644 thirdparty/eigen/test/resize.cpp create mode 100644 thirdparty/eigen/test/rvalue_types.cpp create mode 100644 thirdparty/eigen/test/schur_complex.cpp create mode 100644 thirdparty/eigen/test/schur_real.cpp create mode 100644 thirdparty/eigen/test/selfadjoint.cpp create mode 100644 thirdparty/eigen/test/simplicial_cholesky.cpp create mode 100644 thirdparty/eigen/test/sizeof.cpp create mode 100644 thirdparty/eigen/test/sizeoverflow.cpp create mode 100644 thirdparty/eigen/test/smallvectors.cpp create mode 100644 thirdparty/eigen/test/sparse.h create mode 100644 thirdparty/eigen/test/sparseLM.cpp create mode 100644 thirdparty/eigen/test/sparse_basic.cpp create mode 100644 thirdparty/eigen/test/sparse_block.cpp create mode 100644 thirdparty/eigen/test/sparse_permutations.cpp create mode 100644 thirdparty/eigen/test/sparse_product.cpp create mode 100644 thirdparty/eigen/test/sparse_ref.cpp create mode 100644 thirdparty/eigen/test/sparse_solver.h create mode 100644 thirdparty/eigen/test/sparse_solvers.cpp create mode 100644 thirdparty/eigen/test/sparse_vector.cpp create mode 100644 thirdparty/eigen/test/sparselu.cpp create mode 100644 thirdparty/eigen/test/sparseqr.cpp create mode 100644 thirdparty/eigen/test/special_numbers.cpp create mode 100644 thirdparty/eigen/test/spqr_support.cpp create mode 100644 thirdparty/eigen/test/stable_norm.cpp create mode 100644 thirdparty/eigen/test/stddeque.cpp create mode 100644 thirdparty/eigen/test/stddeque_overload.cpp create mode 100644 thirdparty/eigen/test/stdlist.cpp create mode 100644 thirdparty/eigen/test/stdlist_overload.cpp create mode 100644 thirdparty/eigen/test/stdvector.cpp create mode 100644 thirdparty/eigen/test/stdvector_overload.cpp create mode 100644 thirdparty/eigen/test/superlu_support.cpp create mode 100644 thirdparty/eigen/test/svd_common.h create mode 100644 thirdparty/eigen/test/svd_fill.h create mode 100644 thirdparty/eigen/test/swap.cpp create mode 100644 thirdparty/eigen/test/triangular.cpp create mode 100644 thirdparty/eigen/test/umeyama.cpp create mode 100644 thirdparty/eigen/test/umfpack_support.cpp create mode 100644 thirdparty/eigen/test/unalignedassert.cpp create mode 100644 thirdparty/eigen/test/unalignedcount.cpp create mode 100644 thirdparty/eigen/test/upperbidiagonalization.cpp create mode 100644 thirdparty/eigen/test/vectorization_logic.cpp create mode 100644 thirdparty/eigen/test/vectorwiseop.cpp create mode 100644 thirdparty/eigen/test/visitor.cpp create mode 100644 thirdparty/eigen/test/zerosized.cpp create mode 100644 thirdparty/eigen/unsupported/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/Eigen/AdolcForward create mode 100644 thirdparty/eigen/unsupported/Eigen/AlignedVector3 create mode 100644 thirdparty/eigen/unsupported/Eigen/ArpackSupport create mode 100644 thirdparty/eigen/unsupported/Eigen/AutoDiff create mode 100644 thirdparty/eigen/unsupported/Eigen/BVH create mode 100644 thirdparty/eigen/unsupported/Eigen/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/Tensor create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/TensorSymmetry create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/ThreadPool create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/util/CXX11Meta.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/util/EmulateArray.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h create mode 100644 thirdparty/eigen/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h create mode 100644 thirdparty/eigen/unsupported/Eigen/EulerAngles create mode 100644 thirdparty/eigen/unsupported/Eigen/FFT create mode 100644 thirdparty/eigen/unsupported/Eigen/IterativeSolvers create mode 100644 thirdparty/eigen/unsupported/Eigen/KroneckerProduct create mode 100644 thirdparty/eigen/unsupported/Eigen/LevenbergMarquardt create mode 100644 thirdparty/eigen/unsupported/Eigen/MPRealSupport create mode 100644 thirdparty/eigen/unsupported/Eigen/MatrixFunctions create mode 100644 thirdparty/eigen/unsupported/Eigen/MoreVectorization create mode 100644 thirdparty/eigen/unsupported/Eigen/NonLinearOptimization create mode 100644 thirdparty/eigen/unsupported/Eigen/NumericalDiff create mode 100644 thirdparty/eigen/unsupported/Eigen/OpenGLSupport create mode 100644 thirdparty/eigen/unsupported/Eigen/Polynomials create mode 100644 thirdparty/eigen/unsupported/Eigen/Skyline create mode 100644 thirdparty/eigen/unsupported/Eigen/SparseExtra create mode 100644 thirdparty/eigen/unsupported/Eigen/SpecialFunctions create mode 100644 thirdparty/eigen/unsupported/Eigen/Splines create mode 100644 thirdparty/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h create mode 100755 thirdparty/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/BVH/BVAlgorithms.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/BVH/KdBVH.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/EulerAngles/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/Eigen/src/EulerAngles/EulerAngles.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/EulerAngles/EulerSystem.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/FFT/ei_fftw_impl.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/FFT/ei_kissfft_impl.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/GMRES.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/IncompleteLU.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/IterationController.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/MINRES.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/IterativeSolvers/Scaling.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/LevenbergMarquardt/CopyrightMINPACK.txt create mode 100644 thirdparty/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMcovar.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMonestep.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMpar.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/LevenbergMarquardt/LMqrsolv.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/LevenbergMarquardt/LevenbergMarquardt.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MatrixFunctions/StemFunction.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/MoreVectorization/MathFunctions.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/chkder.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/covar.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/dogleg.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/fdjac1.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/lmpar.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/r1updt.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NonLinearOptimization/rwupdt.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/NumericalDiff/NumericalDiff.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Polynomials/Companion.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Polynomials/PolynomialSolver.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Polynomials/PolynomialUtils.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Skyline/SkylineInplaceLU.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Skyline/SkylineMatrix.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Skyline/SkylineProduct.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Skyline/SkylineStorage.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Skyline/SkylineUtil.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SparseExtra/BlockOfDynamicSparseMatrix.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SparseExtra/DynamicSparseMatrix.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SparseExtra/MatrixMarketIterator.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SparseExtra/RandomSetter.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Splines/Spline.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Splines/SplineFitting.h create mode 100644 thirdparty/eigen/unsupported/Eigen/src/Splines/SplineFwd.h create mode 100644 thirdparty/eigen/unsupported/README.txt create mode 100644 thirdparty/eigen/unsupported/bench/bench_svd.cpp create mode 100644 thirdparty/eigen/unsupported/doc/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/doc/Overview.dox create mode 100644 thirdparty/eigen/unsupported/doc/eigendoxy_layout.xml.in create mode 100644 thirdparty/eigen/unsupported/doc/examples/BVH_Example.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/doc/examples/EulerAngles.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/FFT.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixExponential.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixFunction.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixLogarithm.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixPower.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixPower_optimal.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixSine.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixSinh.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/MatrixSquareRoot.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/PolynomialSolver1.cpp create mode 100644 thirdparty/eigen/unsupported/doc/examples/PolynomialUtils1.cpp create mode 100644 thirdparty/eigen/unsupported/doc/snippets/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/test/BVH.cpp create mode 100644 thirdparty/eigen/unsupported/test/CMakeLists.txt create mode 100644 thirdparty/eigen/unsupported/test/EulerAngles.cpp create mode 100644 thirdparty/eigen/unsupported/test/FFT.cpp create mode 100644 thirdparty/eigen/unsupported/test/FFTW.cpp create mode 100644 thirdparty/eigen/unsupported/test/NonLinearOptimization.cpp create mode 100644 thirdparty/eigen/unsupported/test/NumericalDiff.cpp create mode 100644 thirdparty/eigen/unsupported/test/alignedvector3.cpp create mode 100644 thirdparty/eigen/unsupported/test/autodiff.cpp create mode 100644 thirdparty/eigen/unsupported/test/autodiff_scalar.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_eventcount.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_meta.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_runqueue.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_argmax.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_assign.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_casts.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_chipping.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_comparisons.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_concatenation.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_const.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_contraction.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_convolution.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_custom_index.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_custom_op.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_device.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_dimension.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_empty.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_expr.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_fft.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_forced_eval.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_generator.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_ifft.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_image_patch.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_index_list.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_inflation.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_intdiv.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_io.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_layout_swap.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_lvalue.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_map.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_math.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_mixed_indices.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_morphing.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_notification.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_of_complex.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_of_const_values.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_of_strings.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_padding.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_patch.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_random.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_random_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_reduction.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_ref.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_reverse.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_roundings.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_scan.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_shuffling.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_simple.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_striding.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_sugar.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_sycl.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_symmetry.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_thread_pool.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_uint128.cpp create mode 100644 thirdparty/eigen/unsupported/test/cxx11_tensor_volume_patch.cpp create mode 100644 thirdparty/eigen/unsupported/test/dgmres.cpp create mode 100644 thirdparty/eigen/unsupported/test/forward_adolc.cpp create mode 100644 thirdparty/eigen/unsupported/test/gmres.cpp create mode 100644 thirdparty/eigen/unsupported/test/kronecker_product.cpp create mode 100644 thirdparty/eigen/unsupported/test/levenberg_marquardt.cpp create mode 100644 thirdparty/eigen/unsupported/test/matrix_exponential.cpp create mode 100644 thirdparty/eigen/unsupported/test/matrix_function.cpp create mode 100644 thirdparty/eigen/unsupported/test/matrix_functions.h create mode 100644 thirdparty/eigen/unsupported/test/matrix_power.cpp create mode 100644 thirdparty/eigen/unsupported/test/matrix_square_root.cpp create mode 100644 thirdparty/eigen/unsupported/test/minres.cpp create mode 100644 thirdparty/eigen/unsupported/test/mpreal/mpreal.h create mode 100644 thirdparty/eigen/unsupported/test/mpreal_support.cpp create mode 100644 thirdparty/eigen/unsupported/test/openglsupport.cpp create mode 100644 thirdparty/eigen/unsupported/test/polynomialsolver.cpp create mode 100644 thirdparty/eigen/unsupported/test/polynomialutils.cpp create mode 100644 thirdparty/eigen/unsupported/test/sparse_extra.cpp create mode 100644 thirdparty/eigen/unsupported/test/special_functions.cpp create mode 100644 thirdparty/eigen/unsupported/test/splines.cpp diff --git a/thirdparty/eigen-3.2.10/COPYING.BSD b/thirdparty/eigen-3.2.10/COPYING.BSD index 11971ffe2..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/COPYING.BSD +++ b/thirdparty/eigen-3.2.10/COPYING.BSD @@ -1,26 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ \ No newline at end of file diff --git a/thirdparty/eigen-3.2.10/Eigen/Array b/thirdparty/eigen-3.2.10/Eigen/Array index 3d004fb69..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/Array +++ b/thirdparty/eigen-3.2.10/Eigen/Array @@ -1,11 +0,0 @@ -#ifndef EIGEN_ARRAY_MODULE_H -#define EIGEN_ARRAY_MODULE_H - -// include Core first to handle Eigen2 support macros -#include "Core" - -#ifndef EIGEN2_SUPPORT - #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core. -#endif - -#endif // EIGEN_ARRAY_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/CMakeLists.txt index a92dd6f6c..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/CMakeLists.txt @@ -1,19 +0,0 @@ -include(RegexUtils) -test_escape_string_as_regex() - -file(GLOB Eigen_directory_files "*") - -escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - -foreach(f ${Eigen_directory_files}) - if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src") - list(APPEND Eigen_directory_files_to_install ${f}) - endif() -endforeach(f ${Eigen_directory_files}) - -install(FILES - ${Eigen_directory_files_to_install} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel - ) - -add_subdirectory(src) diff --git a/thirdparty/eigen-3.2.10/Eigen/CholmodSupport b/thirdparty/eigen-3.2.10/Eigen/CholmodSupport index 88c29a646..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/CholmodSupport +++ b/thirdparty/eigen-3.2.10/Eigen/CholmodSupport @@ -1,45 +0,0 @@ -#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H -#define EIGEN_CHOLMODSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -extern "C" { - #include -} - -/** \ingroup Support_modules - * \defgroup CholmodSupport_Module CholmodSupport module - * - * This module provides an interface to the Cholmod library which is part of the suitesparse package. - * It provides the two following main factorization classes: - * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. - * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). - * - * For the sake of completeness, this module also propose the two following classes: - * - class CholmodSimplicialLLT - * - class CholmodSimplicialLDLT - * Note that these classes does not bring any particular advantage compared to the built-in - * SimplicialLLT and SimplicialLDLT factorization classes. - * - * \code - * #include - * \endcode - * - * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies. - * The dependencies depend on how cholmod has been compiled. - * For a cmake based project, you can use our FindCholmod.cmake module to help you in this task. - * - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -#include "src/CholmodSupport/CholmodSupport.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_CHOLMODSUPPORT_MODULE_H - diff --git a/thirdparty/eigen-3.2.10/Eigen/Eigen b/thirdparty/eigen-3.2.10/Eigen/Eigen index 19b40ea4e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/Eigen +++ b/thirdparty/eigen-3.2.10/Eigen/Eigen @@ -1,2 +0,0 @@ -#include "Dense" -//#include "Sparse" diff --git a/thirdparty/eigen-3.2.10/Eigen/Eigen2Support b/thirdparty/eigen-3.2.10/Eigen/Eigen2Support index 6aa009d20..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/Eigen2Support +++ b/thirdparty/eigen-3.2.10/Eigen/Eigen2Support @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2SUPPORT_H -#define EIGEN2SUPPORT_H - -#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) -#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header -#endif - -#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) -#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)" -#else -#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)") -#endif - -#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#include "src/Core/util/DisableStupidWarnings.h" - -/** \ingroup Support_modules - * \defgroup Eigen2Support_Module Eigen2 support module - * - * \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. - * - * This module provides a couple of deprecated functions improving the compatibility with Eigen2. - * - * To use it, define EIGEN2_SUPPORT before including any Eigen header - * \code - * #define EIGEN2_SUPPORT - * \endcode - * - */ - -#include "src/Eigen2Support/Macros.h" -#include "src/Eigen2Support/Memory.h" -#include "src/Eigen2Support/Meta.h" -#include "src/Eigen2Support/Lazy.h" -#include "src/Eigen2Support/Cwise.h" -#include "src/Eigen2Support/CwiseOperators.h" -#include "src/Eigen2Support/TriangularSolver.h" -#include "src/Eigen2Support/Block.h" -#include "src/Eigen2Support/VectorBlock.h" -#include "src/Eigen2Support/Minor.h" -#include "src/Eigen2Support/MathFunctions.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -// Eigen2 used to include iostream -#include - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ -using Eigen::Matrix##SizeSuffix##TypeSuffix; \ -using Eigen::Vector##SizeSuffix##TypeSuffix; \ -using Eigen::RowVector##SizeSuffix##TypeSuffix; - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ - -#define EIGEN_USING_MATRIX_TYPEDEFS \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd) - -#define USING_PART_OF_NAMESPACE_EIGEN \ -EIGEN_USING_MATRIX_TYPEDEFS \ -using Eigen::Matrix; \ -using Eigen::MatrixBase; \ -using Eigen::ei_random; \ -using Eigen::ei_real; \ -using Eigen::ei_imag; \ -using Eigen::ei_conj; \ -using Eigen::ei_abs; \ -using Eigen::ei_abs2; \ -using Eigen::ei_sqrt; \ -using Eigen::ei_exp; \ -using Eigen::ei_log; \ -using Eigen::ei_sin; \ -using Eigen::ei_cos; - -#endif // EIGEN2SUPPORT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/IterativeLinearSolvers b/thirdparty/eigen-3.2.10/Eigen/IterativeLinearSolvers index 0f4159dc1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/IterativeLinearSolvers +++ b/thirdparty/eigen-3.2.10/Eigen/IterativeLinearSolvers @@ -1,40 +0,0 @@ -#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H -#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H - -#include "SparseCore" -#include "OrderingMethods" - -#include "src/Core/util/DisableStupidWarnings.h" - -/** - * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module - * - * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. - * Those solvers are accessible via the following classes: - * - ConjugateGradient for selfadjoint (hermitian) matrices, - * - BiCGSTAB for general square matrices. - * - * These iterative solvers are associated with some preconditioners: - * - IdentityPreconditioner - not really useful - * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. - * - IncompleteILUT - incomplete LU factorization with dual thresholding - * - * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. - * - * \code - * #include - * \endcode - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -#include "src/IterativeLinearSolvers/IterativeSolverBase.h" -#include "src/IterativeLinearSolvers/BasicPreconditioners.h" -#include "src/IterativeLinearSolvers/ConjugateGradient.h" -#include "src/IterativeLinearSolvers/BiCGSTAB.h" -#include "src/IterativeLinearSolvers/IncompleteLUT.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/LeastSquares b/thirdparty/eigen-3.2.10/Eigen/LeastSquares index 35137c25d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/LeastSquares +++ b/thirdparty/eigen-3.2.10/Eigen/LeastSquares @@ -1,32 +0,0 @@ -#ifndef EIGEN_REGRESSION_MODULE_H -#define EIGEN_REGRESSION_MODULE_H - -#ifndef EIGEN2_SUPPORT -#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT) -#endif - -// exclude from normal eigen3-only documentation -#ifdef EIGEN2_SUPPORT - -#include "Core" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include "Eigenvalues" -#include "Geometry" - -/** \defgroup LeastSquares_Module LeastSquares module - * This module provides linear regression and related features. - * - * \code - * #include - * \endcode - */ - -#include "src/Eigen2Support/LeastSquares.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN2_SUPPORT - -#endif // EIGEN_REGRESSION_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/MetisSupport b/thirdparty/eigen-3.2.10/Eigen/MetisSupport index 6a113f7a8..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/MetisSupport +++ b/thirdparty/eigen-3.2.10/Eigen/MetisSupport @@ -1,28 +0,0 @@ -#ifndef EIGEN_METISSUPPORT_MODULE_H -#define EIGEN_METISSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -extern "C" { -#include -} - - -/** \ingroup Support_modules - * \defgroup MetisSupport_Module MetisSupport module - * - * \code - * #include - * \endcode - * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). - * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink - */ - - -#include "src/MetisSupport/MetisSupport.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/OrderingMethods b/thirdparty/eigen-3.2.10/Eigen/OrderingMethods index 7c0f1ffff..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/OrderingMethods +++ b/thirdparty/eigen-3.2.10/Eigen/OrderingMethods @@ -1,66 +0,0 @@ -#ifndef EIGEN_ORDERINGMETHODS_MODULE_H -#define EIGEN_ORDERINGMETHODS_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -/** - * \defgroup OrderingMethods_Module OrderingMethods module - * - * This module is currently for internal use only - * - * It defines various built-in and external ordering methods for sparse matrices. - * They are typically used to reduce the number of elements during - * the sparse matrix decomposition (LLT, LU, QR). - * Precisely, in a preprocessing step, a permutation matrix P is computed using - * those ordering methods and applied to the columns of the matrix. - * Using for instance the sparse Cholesky decomposition, it is expected that - * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A). - * - * - * Usage : - * \code - * #include - * \endcode - * - * A simple usage is as a template parameter in the sparse decomposition classes : - * - * \code - * SparseLU > solver; - * \endcode - * - * \code - * SparseQR > solver; - * \endcode - * - * It is possible as well to call directly a particular ordering method for your own purpose, - * \code - * AMDOrdering ordering; - * PermutationMatrix perm; - * SparseMatrix A; - * //Fill the matrix ... - * - * ordering(A, perm); // Call AMD - * \endcode - * - * \note Some of these methods (like AMD or METIS), need the sparsity pattern - * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, - * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. - * If your matrix is already symmetric (at leat in structure), you can avoid that - * by calling the method with a SelfAdjointView type. - * - * \code - * // Call the ordering on the pattern of the lower triangular matrix A - * ordering(A.selfadjointView(), perm); - * \endcode - */ - -#ifndef EIGEN_MPL2_ONLY -#include "src/OrderingMethods/Amd.h" -#endif - -#include "src/OrderingMethods/Ordering.h" -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/PaStiXSupport b/thirdparty/eigen-3.2.10/Eigen/PaStiXSupport index 7c616ee5e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/PaStiXSupport +++ b/thirdparty/eigen-3.2.10/Eigen/PaStiXSupport @@ -1,46 +0,0 @@ -#ifndef EIGEN_PASTIXSUPPORT_MODULE_H -#define EIGEN_PASTIXSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include -extern "C" { -#include -#include -} - -#ifdef complex -#undef complex -#endif - -/** \ingroup Support_modules - * \defgroup PaStiXSupport_Module PaStiXSupport module - * - * This module provides an interface to the PaSTiX library. - * PaSTiX is a general \b supernodal, \b parallel and \b opensource sparse solver. - * It provides the two following main factorization classes: - * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization. - * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization. - * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern). - * - * \code - * #include - * \endcode - * - * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be linked to the PaSTiX library and its dependencies. - * The dependencies depend on how PaSTiX has been compiled. - * For a cmake based project, you can use our FindPaSTiX.cmake module to help you in this task. - * - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -#include "src/PaStiXSupport/PaStiXSupport.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/PardisoSupport b/thirdparty/eigen-3.2.10/Eigen/PardisoSupport index 99330ce7a..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/PardisoSupport +++ b/thirdparty/eigen-3.2.10/Eigen/PardisoSupport @@ -1,30 +0,0 @@ -#ifndef EIGEN_PARDISOSUPPORT_MODULE_H -#define EIGEN_PARDISOSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include - -#include - -/** \ingroup Support_modules - * \defgroup PardisoSupport_Module PardisoSupport module - * - * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers. - * - * \code - * #include - * \endcode - * - * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be linked to the MKL library and its dependencies. - * See this \ref TopicUsingIntelMKL "page" for more information on MKL-Eigen integration. - * - */ - -#include "src/PardisoSupport/PardisoSupport.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/QtAlignedMalloc b/thirdparty/eigen-3.2.10/Eigen/QtAlignedMalloc index 46f7d83b7..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/QtAlignedMalloc +++ b/thirdparty/eigen-3.2.10/Eigen/QtAlignedMalloc @@ -1,34 +0,0 @@ - -#ifndef EIGEN_QTMALLOC_MODULE_H -#define EIGEN_QTMALLOC_MODULE_H - -#include "Core" - -#if (!EIGEN_MALLOC_ALREADY_ALIGNED) - -#include "src/Core/util/DisableStupidWarnings.h" - -void *qMalloc(size_t size) -{ - return Eigen::internal::aligned_malloc(size); -} - -void qFree(void *ptr) -{ - Eigen::internal::aligned_free(ptr); -} - -void *qRealloc(void *ptr, size_t size) -{ - void* newPtr = Eigen::internal::aligned_malloc(size); - memcpy(newPtr, ptr, size); - Eigen::internal::aligned_free(ptr); - return newPtr; -} - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif - -#endif // EIGEN_QTMALLOC_MODULE_H -/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen-3.2.10/Eigen/SPQRSupport b/thirdparty/eigen-3.2.10/Eigen/SPQRSupport index 7f1eb4770..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/SPQRSupport +++ b/thirdparty/eigen-3.2.10/Eigen/SPQRSupport @@ -1,29 +0,0 @@ -#ifndef EIGEN_SPQRSUPPORT_MODULE_H -#define EIGEN_SPQRSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include "SuiteSparseQR.hpp" - -/** \ingroup Support_modules - * \defgroup SPQRSupport_Module SuiteSparseQR module - * - * This module provides an interface to the SPQR library, which is part of the suitesparse package. - * - * \code - * #include - * \endcode - * - * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...). - * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules - * - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" -#include "src/CholmodSupport/CholmodSupport.h" -#include "src/SPQRSupport/SuiteSparseQRSupport.h" - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/Sparse b/thirdparty/eigen-3.2.10/Eigen/Sparse index 7cc9c0913..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/Sparse +++ b/thirdparty/eigen-3.2.10/Eigen/Sparse @@ -1,27 +0,0 @@ -#ifndef EIGEN_SPARSE_MODULE_H -#define EIGEN_SPARSE_MODULE_H - -/** \defgroup Sparse_Module Sparse meta-module - * - * Meta-module including all related modules: - * - \ref SparseCore_Module - * - \ref OrderingMethods_Module - * - \ref SparseCholesky_Module - * - \ref SparseLU_Module - * - \ref SparseQR_Module - * - \ref IterativeLinearSolvers_Module - * - * \code - * #include - * \endcode - */ - -#include "SparseCore" -#include "OrderingMethods" -#include "SparseCholesky" -#include "SparseLU" -#include "SparseQR" -#include "IterativeLinearSolvers" - -#endif // EIGEN_SPARSE_MODULE_H - diff --git a/thirdparty/eigen-3.2.10/Eigen/SparseCholesky b/thirdparty/eigen-3.2.10/Eigen/SparseCholesky index 9f5056aa1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/SparseCholesky +++ b/thirdparty/eigen-3.2.10/Eigen/SparseCholesky @@ -1,47 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2013 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSECHOLESKY_MODULE_H -#define EIGEN_SPARSECHOLESKY_MODULE_H - -#include "SparseCore" -#include "OrderingMethods" - -#include "src/Core/util/DisableStupidWarnings.h" - -/** - * \defgroup SparseCholesky_Module SparseCholesky module - * - * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices. - * Those decompositions are accessible via the following classes: - * - SimplicialLLt, - * - SimplicialLDLt - * - * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module. - * - * \code - * #include - * \endcode - */ - -#ifdef EIGEN_MPL2_ONLY -#error The SparseCholesky module has nothing to offer in MPL2 only mode -#endif - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" -#include "src/SparseCholesky/SimplicialCholesky.h" - -#ifndef EIGEN_MPL2_ONLY -#include "src/SparseCholesky/SimplicialCholesky_impl.h" -#endif - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_SPARSECHOLESKY_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/SparseCore b/thirdparty/eigen-3.2.10/Eigen/SparseCore index 24bcf0156..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/SparseCore +++ b/thirdparty/eigen-3.2.10/Eigen/SparseCore @@ -1,64 +0,0 @@ -#ifndef EIGEN_SPARSECORE_MODULE_H -#define EIGEN_SPARSECORE_MODULE_H - -#include "Core" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include -#include -#include -#include -#include - -/** - * \defgroup SparseCore_Module SparseCore module - * - * This module provides a sparse matrix representation, and basic associated matrix manipulations - * and operations. - * - * See the \ref TutorialSparse "Sparse tutorial" - * - * \code - * #include - * \endcode - * - * This module depends on: Core. - */ - -namespace Eigen { - -/** The type used to identify a general sparse storage. */ -struct Sparse {}; - -} - -#include "src/SparseCore/SparseUtil.h" -#include "src/SparseCore/SparseMatrixBase.h" -#include "src/SparseCore/CompressedStorage.h" -#include "src/SparseCore/AmbiVector.h" -#include "src/SparseCore/SparseMatrix.h" -#include "src/SparseCore/MappedSparseMatrix.h" -#include "src/SparseCore/SparseVector.h" -#include "src/SparseCore/SparseBlock.h" -#include "src/SparseCore/SparseTranspose.h" -#include "src/SparseCore/SparseCwiseUnaryOp.h" -#include "src/SparseCore/SparseCwiseBinaryOp.h" -#include "src/SparseCore/SparseDot.h" -#include "src/SparseCore/SparsePermutation.h" -#include "src/SparseCore/SparseRedux.h" -#include "src/SparseCore/SparseFuzzy.h" -#include "src/SparseCore/ConservativeSparseSparseProduct.h" -#include "src/SparseCore/SparseSparseProductWithPruning.h" -#include "src/SparseCore/SparseProduct.h" -#include "src/SparseCore/SparseDenseProduct.h" -#include "src/SparseCore/SparseDiagonalProduct.h" -#include "src/SparseCore/SparseTriangularView.h" -#include "src/SparseCore/SparseSelfAdjointView.h" -#include "src/SparseCore/TriangularSolver.h" -#include "src/SparseCore/SparseView.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_SPARSECORE_MODULE_H - diff --git a/thirdparty/eigen-3.2.10/Eigen/SparseLU b/thirdparty/eigen-3.2.10/Eigen/SparseLU index 8527a49bd..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/SparseLU +++ b/thirdparty/eigen-3.2.10/Eigen/SparseLU @@ -1,49 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSELU_MODULE_H -#define EIGEN_SPARSELU_MODULE_H - -#include "SparseCore" - -/** - * \defgroup SparseLU_Module SparseLU module - * This module defines a supernodal factorization of general sparse matrices. - * The code is fully optimized for supernode-panel updates with specialized kernels. - * Please, see the documentation of the SparseLU class for more details. - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -// Ordering interface -#include "OrderingMethods" - -#include "src/SparseLU/SparseLU_gemm_kernel.h" - -#include "src/SparseLU/SparseLU_Structs.h" -#include "src/SparseLU/SparseLU_SupernodalMatrix.h" -#include "src/SparseLU/SparseLUImpl.h" -#include "src/SparseCore/SparseColEtree.h" -#include "src/SparseLU/SparseLU_Memory.h" -#include "src/SparseLU/SparseLU_heap_relax_snode.h" -#include "src/SparseLU/SparseLU_relax_snode.h" -#include "src/SparseLU/SparseLU_pivotL.h" -#include "src/SparseLU/SparseLU_panel_dfs.h" -#include "src/SparseLU/SparseLU_kernel_bmod.h" -#include "src/SparseLU/SparseLU_panel_bmod.h" -#include "src/SparseLU/SparseLU_column_dfs.h" -#include "src/SparseLU/SparseLU_column_bmod.h" -#include "src/SparseLU/SparseLU_copy_to_ucol.h" -#include "src/SparseLU/SparseLU_pruneL.h" -#include "src/SparseLU/SparseLU_Utils.h" -#include "src/SparseLU/SparseLU.h" - -#endif // EIGEN_SPARSELU_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/SparseQR b/thirdparty/eigen-3.2.10/Eigen/SparseQR index 4ee42065e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/SparseQR +++ b/thirdparty/eigen-3.2.10/Eigen/SparseQR @@ -1,33 +0,0 @@ -#ifndef EIGEN_SPARSEQR_MODULE_H -#define EIGEN_SPARSEQR_MODULE_H - -#include "SparseCore" -#include "OrderingMethods" -#include "src/Core/util/DisableStupidWarnings.h" - -/** \defgroup SparseQR_Module SparseQR module - * \brief Provides QR decomposition for sparse matrices - * - * This module provides a simplicial version of the left-looking Sparse QR decomposition. - * The columns of the input matrix should be reordered to limit the fill-in during the - * decomposition. Built-in methods (COLAMD, AMD) or external methods (METIS) can be used to this end. - * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list - * of built-in and external ordering methods. - * - * \code - * #include - * \endcode - * - * - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -#include "OrderingMethods" -#include "src/SparseCore/SparseColEtree.h" -#include "src/SparseQR/SparseQR.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/StdDeque b/thirdparty/eigen-3.2.10/Eigen/StdDeque index f27234778..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/StdDeque +++ b/thirdparty/eigen-3.2.10/Eigen/StdDeque @@ -1,27 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STDDEQUE_MODULE_H -#define EIGEN_STDDEQUE_MODULE_H - -#include "Core" -#include - -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ - -#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) - -#else - -#include "src/StlSupport/StdDeque.h" - -#endif - -#endif // EIGEN_STDDEQUE_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/StdList b/thirdparty/eigen-3.2.10/Eigen/StdList index 225c1e18f..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/StdList +++ b/thirdparty/eigen-3.2.10/Eigen/StdList @@ -1,26 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STDLIST_MODULE_H -#define EIGEN_STDLIST_MODULE_H - -#include "Core" -#include - -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ - -#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) - -#else - -#include "src/StlSupport/StdList.h" - -#endif - -#endif // EIGEN_STDLIST_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/StdVector b/thirdparty/eigen-3.2.10/Eigen/StdVector index 6b22627f6..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/StdVector +++ b/thirdparty/eigen-3.2.10/Eigen/StdVector @@ -1,27 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STDVECTOR_MODULE_H -#define EIGEN_STDVECTOR_MODULE_H - -#include "Core" -#include - -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ - -#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) - -#else - -#include "src/StlSupport/StdVector.h" - -#endif - -#endif // EIGEN_STDVECTOR_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/SuperLUSupport b/thirdparty/eigen-3.2.10/Eigen/SuperLUSupport index 575e14fbc..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/SuperLUSupport +++ b/thirdparty/eigen-3.2.10/Eigen/SuperLUSupport @@ -1,59 +0,0 @@ -#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H -#define EIGEN_SUPERLUSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -#ifdef EMPTY -#define EIGEN_EMPTY_WAS_ALREADY_DEFINED -#endif - -typedef int int_t; -#include -#include -#include - -// slu_util.h defines a preprocessor token named EMPTY which is really polluting, -// so we remove it in favor of a SUPERLU_EMPTY token. -// If EMPTY was already defined then we don't undef it. - -#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED) -# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED -#elif defined(EMPTY) -# undef EMPTY -#endif - -#define SUPERLU_EMPTY (-1) - -namespace Eigen { struct SluMatrix; } - -/** \ingroup Support_modules - * \defgroup SuperLUSupport_Module SuperLUSupport module - * - * This module provides an interface to the SuperLU library. - * It provides the following factorization class: - * - class SuperLU: a supernodal sequential LU factorization. - * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). - * - * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. - * - * \code - * #include - * \endcode - * - * In order to use this module, the superlu headers must be accessible from the include paths, and your binary must be linked to the superlu library and its dependencies. - * The dependencies depend on how superlu has been compiled. - * For a cmake based project, you can use our FindSuperLU.cmake module to help you in this task. - * - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -#include "src/SuperLUSupport/SuperLUSupport.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/UmfPackSupport b/thirdparty/eigen-3.2.10/Eigen/UmfPackSupport index 7b1b66064..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/UmfPackSupport +++ b/thirdparty/eigen-3.2.10/Eigen/UmfPackSupport @@ -1,36 +0,0 @@ -#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H -#define EIGEN_UMFPACKSUPPORT_MODULE_H - -#include "SparseCore" - -#include "src/Core/util/DisableStupidWarnings.h" - -extern "C" { -#include -} - -/** \ingroup Support_modules - * \defgroup UmfPackSupport_Module UmfPackSupport module - * - * This module provides an interface to the UmfPack library which is part of the suitesparse package. - * It provides the following factorization class: - * - class UmfPackLU: a multifrontal sequential LU factorization. - * - * \code - * #include - * \endcode - * - * In order to use this module, the umfpack headers must be accessible from the include paths, and your binary must be linked to the umfpack library and its dependencies. - * The dependencies depend on how umfpack has been compiled. - * For a cmake based project, you can use our FindUmfPack.cmake module to help you in this task. - * - */ - -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - -#include "src/UmfPackSupport/UmfPackSupport.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN_UMFPACKSUPPORT_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/CMakeLists.txt index c326f374d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/CMakeLists.txt @@ -1,7 +0,0 @@ -file(GLOB Eigen_src_subdirectories "*") -escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") -foreach(f ${Eigen_src_subdirectories}) - if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" ) - add_subdirectory(${f}) - endif() -endforeach() diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/CMakeLists.txt index d01488b41..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Cholesky_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Cholesky_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/LLT_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/LLT_MKL.h index 66675d747..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/LLT_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Cholesky/LLT_MKL.h @@ -1,102 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * LLt decomposition based on LAPACKE_?potrf function. - ******************************************************************************** -*/ - -#ifndef EIGEN_LLT_MKL_H -#define EIGEN_LLT_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" -#include - -namespace Eigen { - -namespace internal { - -template struct mkl_llt; - -#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template<> struct mkl_llt \ -{ \ - template \ - static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ - { \ - lapack_int matrix_order; \ - lapack_int size, lda, info, StorageOrder; \ - EIGTYPE* a; \ - eigen_assert(m.rows()==m.cols()); \ - /* Set up parameters for ?potrf */ \ - size = m.rows(); \ - StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - a = &(m.coeffRef(0,0)); \ - lda = m.outerStride(); \ -\ - info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ - info = (info==0) ? -1 : info>0 ? info-1 : size; \ - return info; \ - } \ -}; \ -template<> struct llt_inplace \ -{ \ - template \ - static typename MatrixType::Index blocked(MatrixType& m) \ - { \ - return mkl_llt::potrf(m, 'L'); \ - } \ - template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ - { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ -}; \ -template<> struct llt_inplace \ -{ \ - template \ - static typename MatrixType::Index blocked(MatrixType& m) \ - { \ - return mkl_llt::potrf(m, 'U'); \ - } \ - template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ - { \ - Transpose matt(mat); \ - return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ - } \ -}; - -EIGEN_MKL_LLT(double, double, d) -EIGEN_MKL_LLT(float, float, s) -EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_LLT_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CMakeLists.txt index 814dfa613..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CholmodSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CholmodSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CholmodSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CholmodSupport.h index 99dbe171c..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/CholmodSupport/CholmodSupport.h @@ -1,607 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CHOLMODSUPPORT_H -#define EIGEN_CHOLMODSUPPORT_H - -namespace Eigen { - -namespace internal { - -template -void cholmod_configure_matrix(CholmodType& mat) -{ - if (internal::is_same::value) - { - mat.xtype = CHOLMOD_REAL; - mat.dtype = CHOLMOD_SINGLE; - } - else if (internal::is_same::value) - { - mat.xtype = CHOLMOD_REAL; - mat.dtype = CHOLMOD_DOUBLE; - } - else if (internal::is_same >::value) - { - mat.xtype = CHOLMOD_COMPLEX; - mat.dtype = CHOLMOD_SINGLE; - } - else if (internal::is_same >::value) - { - mat.xtype = CHOLMOD_COMPLEX; - mat.dtype = CHOLMOD_DOUBLE; - } - else - { - eigen_assert(false && "Scalar type not supported by CHOLMOD"); - } -} - -} // namespace internal - -/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. - * Note that the data are shared. - */ -template -cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) -{ - cholmod_sparse res; - res.nzmax = mat.nonZeros(); - res.nrow = mat.rows();; - res.ncol = mat.cols(); - res.p = mat.outerIndexPtr(); - res.i = mat.innerIndexPtr(); - res.x = mat.valuePtr(); - res.z = 0; - res.sorted = 1; - if(mat.isCompressed()) - { - res.packed = 1; - res.nz = 0; - } - else - { - res.packed = 0; - res.nz = mat.innerNonZeroPtr(); - } - - res.dtype = 0; - res.stype = -1; - - if (internal::is_same<_Index,int>::value) - { - res.itype = CHOLMOD_INT; - } - else if (internal::is_same<_Index,SuiteSparse_long>::value) - { - res.itype = CHOLMOD_LONG; - } - else - { - eigen_assert(false && "Index type not supported yet"); - } - - // setup res.xtype - internal::cholmod_configure_matrix<_Scalar>(res); - - res.stype = 0; - - return res; -} - -template -const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat) -{ - cholmod_sparse res = viewAsCholmod(mat.const_cast_derived()); - return res; -} - -/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. - * The data are not copied but shared. */ -template -cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) -{ - cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); - - if(UpLo==Upper) res.stype = 1; - if(UpLo==Lower) res.stype = -1; - - return res; -} - -/** Returns a view of the Eigen \b dense matrix \a mat as Cholmod dense matrix. - * The data are not copied but shared. */ -template -cholmod_dense viewAsCholmod(MatrixBase& mat) -{ - EIGEN_STATIC_ASSERT((internal::traits::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - typedef typename Derived::Scalar Scalar; - - cholmod_dense res; - res.nrow = mat.rows(); - res.ncol = mat.cols(); - res.nzmax = res.nrow * res.ncol; - res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride(); - res.x = (void*)(mat.derived().data()); - res.z = 0; - - internal::cholmod_configure_matrix(res); - - return res; -} - -/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. - * The data are not copied but shared. */ -template -MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) -{ - return MappedSparseMatrix - (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], - static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); -} - -enum CholmodMode { - CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt -}; - - -/** \ingroup CholmodSupport_Module - * \class CholmodBase - * \brief The base class for the direct Cholesky factorization of Cholmod - * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT - */ -template -class CholmodBase : internal::noncopyable -{ - public: - typedef _MatrixType MatrixType; - enum { UpLo = _UpLo }; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef MatrixType CholMatrixType; - typedef typename MatrixType::Index Index; - - public: - - CholmodBase() - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) - { - m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); - cholmod_start(&m_cholmod); - } - - CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) - { - m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); - cholmod_start(&m_cholmod); - compute(matrix); - } - - ~CholmodBase() - { - if(m_cholmodFactor) - cholmod_free_factor(&m_cholmodFactor, &m_cholmod); - cholmod_finish(&m_cholmod); - } - - inline Index cols() const { return m_cholmodFactor->n; } - inline Index rows() const { return m_cholmodFactor->n; } - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - /** Computes the sparse Cholesky decomposition of \a matrix */ - Derived& compute(const MatrixType& matrix) - { - analyzePattern(matrix); - factorize(matrix); - return derived(); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& matrix) - { - if(m_cholmodFactor) - { - cholmod_free_factor(&m_cholmodFactor, &m_cholmod); - m_cholmodFactor = 0; - } - cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); - m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); - - this->m_isInitialized = true; - this->m_info = Success; - m_analysisIsOk = true; - m_factorizationIsOk = false; - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - void factorize(const MatrixType& matrix) - { - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); - cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); - - // If the factorization failed, minor is the column at which it did. On success minor == n. - this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); - m_factorizationIsOk = true; - } - - /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations. - * See the Cholmod user guide for details. */ - cholmod_common& cholmod() { return m_cholmod; } - - #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal */ - template - void _solve(const MatrixBase &b, MatrixBase &dest) const - { - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); - const Index size = m_cholmodFactor->n; - EIGEN_UNUSED_VARIABLE(size); - eigen_assert(size==b.rows()); - - // note: cd stands for Cholmod Dense - Rhs& b_ref(b.const_cast_derived()); - cholmod_dense b_cd = viewAsCholmod(b_ref); - cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); - if(!x_cd) - { - this->m_info = NumericalIssue; - } - // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); - cholmod_free_dense(&x_cd, &m_cholmod); - } - - /** \internal */ - template - void _solve(const SparseMatrix &b, SparseMatrix &dest) const - { - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); - const Index size = m_cholmodFactor->n; - EIGEN_UNUSED_VARIABLE(size); - eigen_assert(size==b.rows()); - - // note: cs stands for Cholmod Sparse - cholmod_sparse b_cs = viewAsCholmod(b); - cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); - if(!x_cs) - { - this->m_info = NumericalIssue; - } - // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - dest = viewAsEigen(*x_cs); - cholmod_free_sparse(&x_cs, &m_cholmod); - } - #endif // EIGEN_PARSED_BY_DOXYGEN - - - /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization. - * - * During the numerical factorization, an offset term is added to the diagonal coefficients:\n - * \c d_ii = \a offset + \c d_ii - * - * The default is \a offset=0. - * - * \returns a reference to \c *this. - */ - Derived& setShift(const RealScalar& offset) - { - m_shiftOffset[0] = offset; - return derived(); - } - - template - void dumpMemory(Stream& /*s*/) - {} - - protected: - mutable cholmod_common m_cholmod; - cholmod_factor* m_cholmodFactor; - RealScalar m_shiftOffset[2]; - mutable ComputationInfo m_info; - bool m_isInitialized; - int m_factorizationIsOk; - int m_analysisIsOk; -}; - -/** \ingroup CholmodSupport_Module - * \class CholmodSimplicialLLT - * \brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod - * - * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization - * using the Cholmod library. - * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest. - * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices - * X and B can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * - * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. - * - * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT - */ -template -class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > -{ - typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base; - using Base::m_cholmod; - - public: - - typedef _MatrixType MatrixType; - - CholmodSimplicialLLT() : Base() { init(); } - - CholmodSimplicialLLT(const MatrixType& matrix) : Base() - { - init(); - Base::compute(matrix); - } - - ~CholmodSimplicialLLT() {} - protected: - void init() - { - m_cholmod.final_asis = 0; - m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; - m_cholmod.final_ll = 1; - } -}; - - -/** \ingroup CholmodSupport_Module - * \class CholmodSimplicialLDLT - * \brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod - * - * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization - * using the Cholmod library. - * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest. - * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices - * X and B can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * - * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. - * - * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT - */ -template -class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > -{ - typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base; - using Base::m_cholmod; - - public: - - typedef _MatrixType MatrixType; - - CholmodSimplicialLDLT() : Base() { init(); } - - CholmodSimplicialLDLT(const MatrixType& matrix) : Base() - { - init(); - Base::compute(matrix); - } - - ~CholmodSimplicialLDLT() {} - protected: - void init() - { - m_cholmod.final_asis = 1; - m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; - } -}; - -/** \ingroup CholmodSupport_Module - * \class CholmodSupernodalLLT - * \brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod - * - * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization - * using the Cholmod library. - * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. - * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices - * X and B can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * - * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > -{ - typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base; - using Base::m_cholmod; - - public: - - typedef _MatrixType MatrixType; - - CholmodSupernodalLLT() : Base() { init(); } - - CholmodSupernodalLLT(const MatrixType& matrix) : Base() - { - init(); - Base::compute(matrix); - } - - ~CholmodSupernodalLLT() {} - protected: - void init() - { - m_cholmod.final_asis = 1; - m_cholmod.supernodal = CHOLMOD_SUPERNODAL; - } -}; - -/** \ingroup CholmodSupport_Module - * \class CholmodDecomposition - * \brief A general Cholesky factorization and solver based on Cholmod - * - * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization - * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices - * X and B can be either dense or sparse. - * - * This variant permits to change the underlying Cholesky method at runtime. - * On the other hand, it does not provide access to the result of the factorization. - * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * - * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > -{ - typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base; - using Base::m_cholmod; - - public: - - typedef _MatrixType MatrixType; - - CholmodDecomposition() : Base() { init(); } - - CholmodDecomposition(const MatrixType& matrix) : Base() - { - init(); - Base::compute(matrix); - } - - ~CholmodDecomposition() {} - - void setMode(CholmodMode mode) - { - switch(mode) - { - case CholmodAuto: - m_cholmod.final_asis = 1; - m_cholmod.supernodal = CHOLMOD_AUTO; - break; - case CholmodSimplicialLLt: - m_cholmod.final_asis = 0; - m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; - m_cholmod.final_ll = 1; - break; - case CholmodSupernodalLLt: - m_cholmod.final_asis = 1; - m_cholmod.supernodal = CHOLMOD_SUPERNODAL; - break; - case CholmodLDLt: - m_cholmod.final_asis = 1; - m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; - break; - default: - break; - } - } - protected: - void init() - { - m_cholmod.final_asis = 1; - m_cholmod.supernodal = CHOLMOD_AUTO; - } -}; - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CHOLMODSUPPORT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/Assign_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/Assign_MKL.h index 7772951b9..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/Assign_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/Assign_MKL.h @@ -1,224 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() - ******************************************************************************** -*/ - -#ifndef EIGEN_ASSIGN_VML_H -#define EIGEN_ASSIGN_VML_H - -namespace Eigen { - -namespace internal { - -template struct vml_call -{ enum { IsSupported = 0 }; }; - -template -class vml_assign_traits -{ - private: - enum { - DstHasDirectAccess = Dst::Flags & DirectAccessBit, - SrcHasDirectAccess = Src::Flags & DirectAccessBit, - - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), - InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) - : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) - : int(Dst::RowsAtCompileTime), - InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) - : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) - : int(Dst::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - - MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess - && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, - MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), - VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, - LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, - MayEnableVml = MightEnableVml && LargeEnough, - MayLinearize = MayEnableVml && MightLinearize - }; - public: - enum { - Traversal = MayLinearize ? LinearVectorizedTraversal - : MayEnableVml ? InnerVectorizedTraversal - : DefaultTraversal - }; -}; - -template::Traversal > -struct vml_assign_impl - : assign_impl,Traversal,Unrolling,BuiltIn> -{ -}; - -template -struct vml_assign_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename Derived1::Index Index; - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) { - const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : - &(src.nestedExpression().coeffRef(0, outer)); - Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); - vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); - } - } -}; - -template -struct vml_assign_impl -{ - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); - } -}; - -// Macroses - -#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ - template \ - struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ - static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ - vml_assign_impl::run(dst, src); \ - } \ - }; - -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) - - -#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) -#define EIGEN_MKL_VML_MODE VML_HA -#else -#define EIGEN_MKL_VML_MODE VML_LA -#endif - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& func, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - EIGENTYPE exponent = func.m_exponent; \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ - (VMLTYPE*)dst, &vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) - - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) - - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) -//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) - -// The vm*powx functions are not avaibale in the windows version of MKL. -#ifndef _WIN32 -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_ASSIGN_VML_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/CMakeLists.txt index 2346fc2bb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/CMakeLists.txt @@ -1,10 +0,0 @@ -FILE(GLOB Eigen_Core_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel - ) - -ADD_SUBDIRECTORY(products) -ADD_SUBDIRECTORY(util) -ADD_SUBDIRECTORY(arch) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/CMakeLists.txt index 9f8d2e9c4..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_AltiVec_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_AltiVec_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/AltiVec COMPONENT Devel -) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/Complex.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/Complex.h index 68d9a2bff..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/Complex.h @@ -1,217 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COMPLEX_ALTIVEC_H -#define EIGEN_COMPLEX_ALTIVEC_H - -namespace Eigen { - -namespace internal { - -static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; -static Packet16uc p16uc_COMPLEX_IM = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; -static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; -static Packet16uc p16uc_COMPLEX_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8);//{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_HI = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 1));//{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 }; -static Packet16uc p16uc_PSET_LO = (Packet16uc) vec_mergeh((Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 2), (Packet4ui) vec_splat((Packet4ui)p16uc_FORWARD, 3));//{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 }; - -//---------- float ---------- -struct Packet2cf -{ - EIGEN_STRONG_INLINE Packet2cf() {} - EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} - Packet4f v; -}; - -template<> struct packet_traits > : default_packet_traits -{ - typedef Packet2cf type; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size = 2, - - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasDiv = 1, - HasNegate = 1, - HasAbs = 0, - HasAbs2 = 0, - HasMin = 0, - HasMax = 0, - HasSetLinear = 0 - }; -}; - -template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; }; - -template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) -{ - Packet2cf res; - /* On AltiVec we cannot load 64-bit registers, so wa have to take care of alignment */ - if((ptrdiff_t(&from) % 16) == 0) - res.v = pload((const float *)&from); - else - res.v = ploadu((const float *)&from); - res.v = vec_perm(res.v, res.v, p16uc_PSET_HI); - return res; -} - -template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_add(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_sub(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf((Packet4f)vec_xor((Packet4ui)a.v, p4ui_CONJ_XOR)); } - -template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) -{ - Packet4f v1, v2; - - // Permute and multiply the real parts of a and b - v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE); - // Get the imaginary parts of a - v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM); - // multiply a_re * b - v1 = vec_madd(v1, b.v, p4f_ZERO); - // multiply a_im * b and get the conjugate result - v2 = vec_madd(v2, b.v, p4f_ZERO); - v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); - // permute back to a proper order - v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV); - - return Packet2cf(vec_add(v1, v2)); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_or(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_xor(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(vec_and(a.v, vec_nor(b.v,b.v))); } - -template<> EIGEN_STRONG_INLINE Packet2cf pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } - -template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) -{ - return pset1(*from); -} - -template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } - -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((float *)addr, DST_CTRL(2,2,32), DST_CHAN); } - -template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) -{ - std::complex EIGEN_ALIGN16 res[2]; - pstore((float *)&res, a.v); - - return res[0]; -} - -template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) -{ - Packet4f rev_a; - rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2); - return Packet2cf(rev_a); -} - -template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) -{ - Packet4f b; - b = (Packet4f) vec_sld(a.v, a.v, 8); - b = padd(a.v, b); - return pfirst(Packet2cf(b)); -} - -template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) -{ - Packet4f b1, b2; - - b1 = (Packet4f) vec_sld(vecs[0].v, vecs[1].v, 8); - b2 = (Packet4f) vec_sld(vecs[1].v, vecs[0].v, 8); - b2 = (Packet4f) vec_sld(b2, b2, 8); - b2 = padd(b1, b2); - - return Packet2cf(b2); -} - -template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) -{ - Packet4f b; - Packet2cf prod; - b = (Packet4f) vec_sld(a.v, a.v, 8); - prod = pmul(a, Packet2cf(b)); - - return pfirst(prod); -} - -template -struct palign_impl -{ - static EIGEN_STRONG_INLINE void run(Packet2cf& first, const Packet2cf& second) - { - if (Offset==1) - { - first.v = vec_sld(first.v, second.v, 8); - } - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - -template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) -{ - // TODO optimize it for AltiVec - Packet2cf res = conj_helper().pmul(a,b); - Packet4f s = vec_madd(b.v, b.v, p4f_ZERO); - return Packet2cf(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV)))); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x) -{ - return Packet2cf(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COMPLEX_ALTIVEC_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/PacketMath.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/PacketMath.h index e4089962d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -1,501 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Konstantinos Margaritis -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PACKET_MATH_ALTIVEC_H -#define EIGEN_PACKET_MATH_ALTIVEC_H - -namespace Eigen { - -namespace internal { - -#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD -#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 -#endif - -#ifndef EIGEN_HAS_FUSE_CJMADD -#define EIGEN_HAS_FUSE_CJMADD 1 -#endif - -// NOTE Altivec has 32 registers, but Eigen only accepts a value of 8 or 16 -#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS -#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 -#endif - -typedef __vector float Packet4f; -typedef __vector int Packet4i; -typedef __vector unsigned int Packet4ui; -typedef __vector __bool int Packet4bi; -typedef __vector short int Packet8i; -typedef __vector unsigned char Packet16uc; - -// We don't want to write the same code all the time, but we need to reuse the constants -// and it doesn't really work to declare them global, so we define macros instead - -#define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \ - Packet4f p4f_##NAME = (Packet4f) vec_splat_s32(X) - -#define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \ - Packet4i p4i_##NAME = vec_splat_s32(X) - -#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ - Packet4f p4f_##NAME = pset1(X) - -#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) - -#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ - Packet4i p4i_##NAME = pset1(X) - -#define DST_CHAN 1 -#define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) - -// Define global static constants: -static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; -static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 }; -static Packet16uc p16uc_REVERSE = {12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; -static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); -static Packet16uc p16uc_DUPLICATE = {0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; - -static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); -static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); -static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); -static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); - -template<> struct packet_traits : default_packet_traits -{ - typedef Packet4f type; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size=4, - - // FIXME check the Has* - HasSin = 0, - HasCos = 0, - HasLog = 0, - HasExp = 0, - HasSqrt = 0 - }; -}; -template<> struct packet_traits : default_packet_traits -{ - typedef Packet4i type; - enum { - // FIXME check the Has* - Vectorizable = 1, - AlignedOnScalar = 1, - size=4 - }; -}; - -template<> struct unpacket_traits { typedef float type; enum {size=4}; }; -template<> struct unpacket_traits { typedef int type; enum {size=4}; }; -/* -inline std::ostream & operator <<(std::ostream & s, const Packet4f & v) -{ - union { - Packet4f v; - float n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} - -inline std::ostream & operator <<(std::ostream & s, const Packet4i & v) -{ - union { - Packet4i v; - int n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} - -inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v) -{ - union { - Packet4ui v; - unsigned int n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} - -inline std::ostream & operator <<(std::ostream & s, const Packetbi & v) -{ - union { - Packet4bi v; - unsigned int n[4]; - } vt; - vt.v = v; - s << vt.n[0] << ", " << vt.n[1] << ", " << vt.n[2] << ", " << vt.n[3]; - return s; -} -*/ -template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html - float EIGEN_ALIGN16 af[4]; - af[0] = from; - Packet4f vc = vec_ld(0, af); - vc = vec_splat(vc, 0); - return vc; -} - -template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { - int EIGEN_ALIGN16 ai[4]; - ai[0] = from; - Packet4i vc = vec_ld(0, ai); - vc = vec_splat(vc, 0); - return vc; -} - -template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return vec_add(pset1(a), p4f_COUNTDOWN); } -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return vec_add(pset1(a), p4i_COUNTDOWN); } - -template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return vec_sub(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return vec_sub(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return psub(p4f_ZERO, a); } -template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return psub(p4i_ZERO, a); } - -template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } -template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } - -template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b,p4f_ZERO); } -/* Commented out: it's actually slower than processing it scalar - * -template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) -{ - // Detailed in: http://freevec.org/content/32bit_signed_integer_multiplication_altivec - //Set up constants, variables - Packet4i a1, b1, bswap, low_prod, high_prod, prod, prod_, v1sel; - - // Get the absolute values - a1 = vec_abs(a); - b1 = vec_abs(b); - - // Get the signs using xor - Packet4bi sgn = (Packet4bi) vec_cmplt(vec_xor(a, b), p4i_ZERO); - - // Do the multiplication for the asbolute values. - bswap = (Packet4i) vec_rl((Packet4ui) b1, (Packet4ui) p4i_MINUS16 ); - low_prod = vec_mulo((Packet8i) a1, (Packet8i)b1); - high_prod = vec_msum((Packet8i) a1, (Packet8i) bswap, p4i_ZERO); - high_prod = (Packet4i) vec_sl((Packet4ui) high_prod, (Packet4ui) p4i_MINUS16); - prod = vec_add( low_prod, high_prod ); - - // NOR the product and select only the negative elements according to the sign mask - prod_ = vec_nor(prod, prod); - prod_ = vec_sel(p4i_ZERO, prod_, sgn); - - // Add 1 to the result to get the negative numbers - v1sel = vec_sel(p4i_ZERO, p4i_ONE, sgn); - prod_ = vec_add(prod_, v1sel); - - // Merge the results back to the final vector. - prod = vec_sel(prod, prod_, sgn); - - return prod; -} -*/ -template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) -{ - Packet4f t, y_0, y_1, res; - - // Altivec does not offer a divide instruction, we have to do a reciprocal approximation - y_0 = vec_re(b); - - // Do one Newton-Raphson iteration to get the needed accuracy - t = vec_nmsub(y_0, b, p4f_ONE); - y_1 = vec_madd(y_0, t, y_0); - - res = vec_madd(a, y_1, p4f_ZERO); - return res; -} - -template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) -{ eigen_assert(false && "packet integer division are not supported by AltiVec"); - return pset1(0); -} - -// for some weird raisons, it has to be overloaded for packet of integers -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); } -template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); } - -template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } - -template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } - -// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics -template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } - -template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) { return vec_or(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); } - -template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); } -template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); } - -template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); } -template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); } - -template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } -template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } - -template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) -{ - EIGEN_DEBUG_ALIGNED_LOAD - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html - Packet16uc MSQ, LSQ; - Packet16uc mask; - MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword - LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword - mask = vec_lvsl(0, from); // create the permute mask - return (Packet4f) vec_perm(MSQ, LSQ, mask); // align the data - -} -template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) -{ - EIGEN_DEBUG_ALIGNED_LOAD - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html - Packet16uc MSQ, LSQ; - Packet16uc mask; - MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword - LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword - mask = vec_lvsl(0, from); // create the permute mask - return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data -} - -template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) -{ - Packet4f p; - if((ptrdiff_t(&from) % 16) == 0) p = pload(from); - else p = ploadu(from); - return vec_perm(p, p, p16uc_DUPLICATE); -} -template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) -{ - Packet4i p; - if((ptrdiff_t(&from) % 16) == 0) p = pload(from); - else p = ploadu(from); - return vec_perm(p, p, p16uc_DUPLICATE); -} - -template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } -template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vec_st(from, 0, to); } - -template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) -{ - EIGEN_DEBUG_UNALIGNED_STORE - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html - // Warning: not thread safe! - Packet16uc MSQ, LSQ, edges; - Packet16uc edgeAlign, align; - - MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword - LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword - edgeAlign = vec_lvsl(0, to); // permute map to extract edges - edges=vec_perm(LSQ,MSQ,edgeAlign); // extract the edges - align = vec_lvsr( 0, to ); // permute map to misalign data - MSQ = vec_perm(edges,(Packet16uc)from,align); // misalign the data (MSQ) - LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ) - vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first - vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part -} -template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) -{ - EIGEN_DEBUG_UNALIGNED_STORE - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html - // Warning: not thread safe! - Packet16uc MSQ, LSQ, edges; - Packet16uc edgeAlign, align; - - MSQ = vec_ld(0, (unsigned char *)to); // most significant quadword - LSQ = vec_ld(15, (unsigned char *)to); // least significant quadword - edgeAlign = vec_lvsl(0, to); // permute map to extract edges - edges=vec_perm(LSQ, MSQ, edgeAlign); // extract the edges - align = vec_lvsr( 0, to ); // permute map to misalign data - MSQ = vec_perm(edges, (Packet16uc) from, align); // misalign the data (MSQ) - LSQ = vec_perm((Packet16uc) from, edges, align); // misalign the data (LSQ) - vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first - vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part -} - -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { vec_dstt(addr, DST_CTRL(2,2,32), DST_CHAN); } - -template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } -template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vec_st(a, 0, x); return x[0]; } - -template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { return (Packet4f)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } -template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { return (Packet4i)vec_perm((Packet16uc)a,(Packet16uc)a, p16uc_REVERSE); } - -template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); } -template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); } - -template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) -{ - Packet4f b, sum; - b = (Packet4f) vec_sld(a, a, 8); - sum = vec_add(a, b); - b = (Packet4f) vec_sld(sum, sum, 4); - sum = vec_add(sum, b); - return pfirst(sum); -} - -template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) -{ - Packet4f v[4], sum[4]; - - // It's easier and faster to transpose then add as columns - // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation - // Do the transpose, first set of moves - v[0] = vec_mergeh(vecs[0], vecs[2]); - v[1] = vec_mergel(vecs[0], vecs[2]); - v[2] = vec_mergeh(vecs[1], vecs[3]); - v[3] = vec_mergel(vecs[1], vecs[3]); - // Get the resulting vectors - sum[0] = vec_mergeh(v[0], v[2]); - sum[1] = vec_mergel(v[0], v[2]); - sum[2] = vec_mergeh(v[1], v[3]); - sum[3] = vec_mergel(v[1], v[3]); - - // Now do the summation: - // Lines 0+1 - sum[0] = vec_add(sum[0], sum[1]); - // Lines 2+3 - sum[1] = vec_add(sum[2], sum[3]); - // Add the results - sum[0] = vec_add(sum[0], sum[1]); - - return sum[0]; -} - -template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) -{ - Packet4i sum; - sum = vec_sums(a, p4i_ZERO); - sum = vec_sld(sum, p4i_ZERO, 12); - return pfirst(sum); -} - -template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) -{ - Packet4i v[4], sum[4]; - - // It's easier and faster to transpose then add as columns - // Check: http://www.freevec.org/function/matrix_4x4_transpose_floats for explanation - // Do the transpose, first set of moves - v[0] = vec_mergeh(vecs[0], vecs[2]); - v[1] = vec_mergel(vecs[0], vecs[2]); - v[2] = vec_mergeh(vecs[1], vecs[3]); - v[3] = vec_mergel(vecs[1], vecs[3]); - // Get the resulting vectors - sum[0] = vec_mergeh(v[0], v[2]); - sum[1] = vec_mergel(v[0], v[2]); - sum[2] = vec_mergeh(v[1], v[3]); - sum[3] = vec_mergel(v[1], v[3]); - - // Now do the summation: - // Lines 0+1 - sum[0] = vec_add(sum[0], sum[1]); - // Lines 2+3 - sum[1] = vec_add(sum[2], sum[3]); - // Add the results - sum[0] = vec_add(sum[0], sum[1]); - - return sum[0]; -} - -// Other reduction functions: -// mul -template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) -{ - Packet4f prod; - prod = pmul(a, (Packet4f)vec_sld(a, a, 8)); - return pfirst(pmul(prod, (Packet4f)vec_sld(prod, prod, 4))); -} - -template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) -{ - EIGEN_ALIGN16 int aux[4]; - pstore(aux, a); - return aux[0] * aux[1] * aux[2] * aux[3]; -} - -// min -template<> EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) -{ - Packet4f b, res; - b = vec_min(a, vec_sld(a, a, 8)); - res = vec_min(b, vec_sld(b, b, 4)); - return pfirst(res); -} - -template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) -{ - Packet4i b, res; - b = vec_min(a, vec_sld(a, a, 8)); - res = vec_min(b, vec_sld(b, b, 4)); - return pfirst(res); -} - -// max -template<> EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) -{ - Packet4f b, res; - b = vec_max(a, vec_sld(a, a, 8)); - res = vec_max(b, vec_sld(b, b, 4)); - return pfirst(res); -} - -template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) -{ - Packet4i b, res; - b = vec_max(a, vec_sld(a, a, 8)); - res = vec_max(b, vec_sld(b, b, 4)); - return pfirst(res); -} - -template -struct palign_impl -{ - static EIGEN_STRONG_INLINE void run(Packet4f& first, const Packet4f& second) - { - if (Offset!=0) - first = vec_sld(first, second, Offset*4); - } -}; - -template -struct palign_impl -{ - static EIGEN_STRONG_INLINE void run(Packet4i& first, const Packet4i& second) - { - if (Offset!=0) - first = vec_sld(first, second, Offset*4); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PACKET_MATH_ALTIVEC_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/CMakeLists.txt index 8456dec15..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/CMakeLists.txt @@ -1,4 +0,0 @@ -ADD_SUBDIRECTORY(SSE) -ADD_SUBDIRECTORY(AltiVec) -ADD_SUBDIRECTORY(NEON) -ADD_SUBDIRECTORY(Default) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/Default/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/Default/CMakeLists.txt index 339c091d1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/Default/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/Default/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_Default_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_Default_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/Default COMPONENT Devel -) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/CMakeLists.txt index fd4d4af50..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_NEON_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_NEON_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/NEON COMPONENT Devel -) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/Complex.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/Complex.h index 8d9255eef..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/Complex.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/Complex.h @@ -1,253 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COMPLEX_NEON_H -#define EIGEN_COMPLEX_NEON_H - -namespace Eigen { - -namespace internal { - -static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000); -static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000); - -//---------- float ---------- -struct Packet2cf -{ - EIGEN_STRONG_INLINE Packet2cf() {} - EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {} - Packet4f v; -}; - -template<> struct packet_traits > : default_packet_traits -{ - typedef Packet2cf type; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size = 2, - - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasDiv = 1, - HasNegate = 1, - HasAbs = 0, - HasAbs2 = 0, - HasMin = 0, - HasMax = 0, - HasSetLinear = 0 - }; -}; - -template<> struct unpacket_traits { typedef std::complex type; enum {size=2}; }; - -template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) -{ - float32x2_t r64; - r64 = vld1_f32((float *)&from); - - return Packet2cf(vcombine_f32(r64, r64)); -} - -template<> EIGEN_STRONG_INLINE Packet2cf padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate(a.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) -{ - Packet4ui b = vreinterpretq_u32_f32(a.v); - return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR))); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) -{ - Packet4f v1, v2; - - // Get the real values of a | a1_re | a1_re | a2_re | a2_re | - v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); - // Get the real values of a | a1_im | a1_im | a2_im | a2_im | - v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); - // Multiply the real a with b - v1 = vmulq_f32(v1, b.v); - // Multiply the imag a with b - v2 = vmulq_f32(v2, b.v); - // Conjugate v2 - v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR)); - // Swap real/imag elements in v2. - v2 = vrev64q_f32(v2); - // Add and return the result - return Packet2cf(vaddq_f32(v1, v2)); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pand (const Packet2cf& a, const Packet2cf& b) -{ - return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); -} -template<> EIGEN_STRONG_INLINE Packet2cf por (const Packet2cf& a, const Packet2cf& b) -{ - return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); -} -template<> EIGEN_STRONG_INLINE Packet2cf pxor (const Packet2cf& a, const Packet2cf& b) -{ - return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); -} -template<> EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packet2cf& b) -{ - return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v)))); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu((const float*)from)); } - -template<> EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* from) { return pset1(*from); } - -template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } - -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((float *)addr); } - -template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) -{ - std::complex EIGEN_ALIGN16 x[2]; - vst1q_f32((float *)x, a.v); - return x[0]; -} - -template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) -{ - float32x2_t a_lo, a_hi; - Packet4f a_r128; - - a_lo = vget_low_f32(a.v); - a_hi = vget_high_f32(a.v); - a_r128 = vcombine_f32(a_hi, a_lo); - - return Packet2cf(a_r128); -} - -template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& a) -{ - return Packet2cf(vrev64q_f32(a.v)); -} - -template<> EIGEN_STRONG_INLINE std::complex predux(const Packet2cf& a) -{ - float32x2_t a1, a2; - std::complex s; - - a1 = vget_low_f32(a.v); - a2 = vget_high_f32(a.v); - a2 = vadd_f32(a1, a2); - vst1_f32((float *)&s, a2); - - return s; -} - -template<> EIGEN_STRONG_INLINE Packet2cf preduxp(const Packet2cf* vecs) -{ - Packet4f sum1, sum2, sum; - - // Add the first two 64-bit float32x2_t of vecs[0] - sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v)); - sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v)); - sum = vaddq_f32(sum1, sum2); - - return Packet2cf(sum); -} - -template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet2cf& a) -{ - float32x2_t a1, a2, v1, v2, prod; - std::complex s; - - a1 = vget_low_f32(a.v); - a2 = vget_high_f32(a.v); - // Get the real values of a | a1_re | a1_re | a2_re | a2_re | - v1 = vdup_lane_f32(a1, 0); - // Get the real values of a | a1_im | a1_im | a2_im | a2_im | - v2 = vdup_lane_f32(a1, 1); - // Multiply the real a with b - v1 = vmul_f32(v1, a2); - // Multiply the imag a with b - v2 = vmul_f32(v2, a2); - // Conjugate v2 - v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR)); - // Swap real/imag elements in v2. - v2 = vrev64_f32(v2); - // Add v1, v2 - prod = vadd_f32(v1, v2); - - vst1_f32((float *)&s, prod); - - return s; -} - -template -struct palign_impl -{ - EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second) - { - if (Offset==1) - { - first.v = vextq_f32(first.v, second.v, 2); - } - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(a, pconj(b)); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return internal::pmul(pconj(a), b); - } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(pmul(x,y),c); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const - { - return pconj(internal::pmul(a, b)); - } -}; - -template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) -{ - // TODO optimize it for AltiVec - Packet2cf res = conj_helper().pmul(a,b); - Packet4f s, rev_s; - - // this computes the norm - s = vmulq_f32(b.v, b.v); - rev_s = vrev64q_f32(s); - - return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COMPLEX_NEON_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/PacketMath.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/PacketMath.h index d49670e04..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/NEON/PacketMath.h @@ -1,420 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// Copyright (C) 2010 Konstantinos Margaritis -// Heavily based on Gael's SSE version. -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PACKET_MATH_NEON_H -#define EIGEN_PACKET_MATH_NEON_H - -namespace Eigen { - -namespace internal { - -#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD -#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 -#endif - -// FIXME NEON has 16 quad registers, but since the current register allocator -// is so bad, it is much better to reduce it to 8 -#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS -#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 -#endif - -typedef float32x4_t Packet4f; -typedef int32x4_t Packet4i; -typedef uint32x4_t Packet4ui; - -#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ - const Packet4f p4f_##NAME = pset1(X) - -#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - const Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) - -#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ - const Packet4i p4i_##NAME = pset1(X) - -#if defined(__llvm__) && !defined(__clang__) - //Special treatment for Apple's llvm-gcc, its NEON packet types are unions - #define EIGEN_INIT_NEON_PACKET2(X, Y) {{X, Y}} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}} -#else - //Default initializer for packets - #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} - #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} -#endif - -// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function -// which available on LLVM and GCC (at least) -#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__) - #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); -#elif defined __pld - #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) -#elif !defined(__aarch64__) - #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); -#else - // by default no explicit prefetching - #define EIGEN_ARM_PREFETCH(ADDR) -#endif - -template<> struct packet_traits : default_packet_traits -{ - typedef Packet4f type; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size = 4, - - HasDiv = 1, - // FIXME check the Has* - HasSin = 0, - HasCos = 0, - HasLog = 0, - HasExp = 0, - HasSqrt = 0 - }; -}; -template<> struct packet_traits : default_packet_traits -{ - typedef Packet4i type; - enum { - Vectorizable = 1, - AlignedOnScalar = 1, - size=4 - // FIXME check the Has* - }; -}; - -#if EIGEN_GNUC_AT_MOST(4,4) && !defined(__llvm__) -// workaround gcc 4.2, 4.3 and 4.4 compilatin issue -EIGEN_STRONG_INLINE float32x4_t vld1q_f32(const float* x) { return ::vld1q_f32((const float32_t*)x); } -EIGEN_STRONG_INLINE float32x2_t vld1_f32 (const float* x) { return ::vld1_f32 ((const float32_t*)x); } -EIGEN_STRONG_INLINE void vst1q_f32(float* to, float32x4_t from) { ::vst1q_f32((float32_t*)to,from); } -EIGEN_STRONG_INLINE void vst1_f32 (float* to, float32x2_t from) { ::vst1_f32 ((float32_t*)to,from); } -#endif - -template<> struct unpacket_traits { typedef float type; enum {size=4}; }; -template<> struct unpacket_traits { typedef int type; enum {size=4}; }; - -template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vdupq_n_s32(from); } - -template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) -{ - Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); - return vaddq_f32(pset1(a), countdown); -} -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) -{ - Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); - return vaddq_s32(pset1(a), countdown); -} - -template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return vaddq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return vaddq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f psub(const Packet4f& a, const Packet4f& b) { return vsubq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i psub(const Packet4i& a, const Packet4i& b) { return vsubq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return vnegq_f32(a); } -template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return vnegq_s32(a); } - -template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } -template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } - -template<> EIGEN_STRONG_INLINE Packet4f pmul(const Packet4f& a, const Packet4f& b) { return vmulq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pmul(const Packet4i& a, const Packet4i& b) { return vmulq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) -{ - Packet4f inv, restep, div; - - // NEON does not offer a divide instruction, we have to do a reciprocal approximation - // However NEON in contrast to other SIMD engines (AltiVec/SSE), offers - // a reciprocal estimate AND a reciprocal step -which saves a few instructions - // vrecpeq_f32() returns an estimate to 1/b, which we will finetune with - // Newton-Raphson and vrecpsq_f32() - inv = vrecpeq_f32(b); - - // This returns a differential, by which we will have to multiply inv to get a better - // approximation of 1/b. - restep = vrecpsq_f32(b, inv); - inv = vmulq_f32(restep, inv); - - // Finally, multiply a by 1/b and get the wanted result of the division. - div = vmulq_f32(a, inv); - - return div; -} -template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, const Packet4i& /*b*/) -{ eigen_assert(false && "packet integer division are not supported by NEON"); - return pset1(0); -} - -// for some weird raisons, it has to be overloaded for packet of integers -template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vmlaq_f32(c,a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return vmlaq_s32(c,a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vminq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vmaxq_s32(a,b); } - -// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics -template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) -{ - return vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); -} -template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return vandq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f por(const Packet4f& a, const Packet4f& b) -{ - return vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); -} -template<> EIGEN_STRONG_INLINE Packet4i por(const Packet4i& a, const Packet4i& b) { return vorrq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pxor(const Packet4f& a, const Packet4f& b) -{ - return vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); -} -template<> EIGEN_STRONG_INLINE Packet4i pxor(const Packet4i& a, const Packet4i& b) { return veorq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f& b) -{ - return vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a),vreinterpretq_u32_f32(b))); -} -template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return vbicq_s32(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_s32(from); } - -template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f32(from); } -template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_s32(from); } - -template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) -{ - float32x2_t lo, hi; - lo = vld1_dup_f32(from); - hi = vld1_dup_f32(from+1); - return vcombine_f32(lo, hi); -} -template<> EIGEN_STRONG_INLINE Packet4i ploaddup(const int* from) -{ - int32x2_t lo, hi; - lo = vld1_dup_s32(from); - hi = vld1_dup_s32(from+1); - return vcombine_s32(lo, hi); -} - -template<> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { EIGEN_DEBUG_ALIGNED_STORE vst1q_s32(to, from); } - -template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } -template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } - -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { EIGEN_ARM_PREFETCH(addr); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { EIGEN_ARM_PREFETCH(addr); } - -// FIXME only store the 2 first elements ? -template<> EIGEN_STRONG_INLINE float pfirst(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } -template<> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; vst1q_s32(x, a); return x[0]; } - -template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) { - float32x2_t a_lo, a_hi; - Packet4f a_r64; - - a_r64 = vrev64q_f32(a); - a_lo = vget_low_f32(a_r64); - a_hi = vget_high_f32(a_r64); - return vcombine_f32(a_hi, a_lo); -} -template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) { - int32x2_t a_lo, a_hi; - Packet4i a_r64; - - a_r64 = vrev64q_s32(a); - a_lo = vget_low_s32(a_r64); - a_hi = vget_high_s32(a_r64); - return vcombine_s32(a_hi, a_lo); -} -template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vabsq_f32(a); } -template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vabsq_s32(a); } - -template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) -{ - float32x2_t a_lo, a_hi, sum; - - a_lo = vget_low_f32(a); - a_hi = vget_high_f32(a); - sum = vpadd_f32(a_lo, a_hi); - sum = vpadd_f32(sum, sum); - return vget_lane_f32(sum, 0); -} - -template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) -{ - float32x4x2_t vtrn1, vtrn2, res1, res2; - Packet4f sum1, sum2, sum; - - // NEON zip performs interleaving of the supplied vectors. - // We perform two interleaves in a row to acquire the transposed vector - vtrn1 = vzipq_f32(vecs[0], vecs[2]); - vtrn2 = vzipq_f32(vecs[1], vecs[3]); - res1 = vzipq_f32(vtrn1.val[0], vtrn2.val[0]); - res2 = vzipq_f32(vtrn1.val[1], vtrn2.val[1]); - - // Do the addition of the resulting vectors - sum1 = vaddq_f32(res1.val[0], res1.val[1]); - sum2 = vaddq_f32(res2.val[0], res2.val[1]); - sum = vaddq_f32(sum1, sum2); - - return sum; -} - -template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) -{ - int32x2_t a_lo, a_hi, sum; - - a_lo = vget_low_s32(a); - a_hi = vget_high_s32(a); - sum = vpadd_s32(a_lo, a_hi); - sum = vpadd_s32(sum, sum); - return vget_lane_s32(sum, 0); -} - -template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) -{ - int32x4x2_t vtrn1, vtrn2, res1, res2; - Packet4i sum1, sum2, sum; - - // NEON zip performs interleaving of the supplied vectors. - // We perform two interleaves in a row to acquire the transposed vector - vtrn1 = vzipq_s32(vecs[0], vecs[2]); - vtrn2 = vzipq_s32(vecs[1], vecs[3]); - res1 = vzipq_s32(vtrn1.val[0], vtrn2.val[0]); - res2 = vzipq_s32(vtrn1.val[1], vtrn2.val[1]); - - // Do the addition of the resulting vectors - sum1 = vaddq_s32(res1.val[0], res1.val[1]); - sum2 = vaddq_s32(res2.val[0], res2.val[1]); - sum = vaddq_s32(sum1, sum2); - - return sum; -} - -// Other reduction functions: -// mul -template<> EIGEN_STRONG_INLINE float predux_mul(const Packet4f& a) -{ - float32x2_t a_lo, a_hi, prod; - - // Get a_lo = |a1|a2| and a_hi = |a3|a4| - a_lo = vget_low_f32(a); - a_hi = vget_high_f32(a); - // Get the product of a_lo * a_hi -> |a1*a3|a2*a4| - prod = vmul_f32(a_lo, a_hi); - // Multiply prod with its swapped value |a2*a4|a1*a3| - prod = vmul_f32(prod, vrev64_f32(prod)); - - return vget_lane_f32(prod, 0); -} -template<> EIGEN_STRONG_INLINE int predux_mul(const Packet4i& a) -{ - int32x2_t a_lo, a_hi, prod; - - // Get a_lo = |a1|a2| and a_hi = |a3|a4| - a_lo = vget_low_s32(a); - a_hi = vget_high_s32(a); - // Get the product of a_lo * a_hi -> |a1*a3|a2*a4| - prod = vmul_s32(a_lo, a_hi); - // Multiply prod with its swapped value |a2*a4|a1*a3| - prod = vmul_s32(prod, vrev64_s32(prod)); - - return vget_lane_s32(prod, 0); -} - -// min -template<> EIGEN_STRONG_INLINE float predux_min(const Packet4f& a) -{ - float32x2_t a_lo, a_hi, min; - - a_lo = vget_low_f32(a); - a_hi = vget_high_f32(a); - min = vpmin_f32(a_lo, a_hi); - min = vpmin_f32(min, min); - - return vget_lane_f32(min, 0); -} - -template<> EIGEN_STRONG_INLINE int predux_min(const Packet4i& a) -{ - int32x2_t a_lo, a_hi, min; - - a_lo = vget_low_s32(a); - a_hi = vget_high_s32(a); - min = vpmin_s32(a_lo, a_hi); - min = vpmin_s32(min, min); - - return vget_lane_s32(min, 0); -} - -// max -template<> EIGEN_STRONG_INLINE float predux_max(const Packet4f& a) -{ - float32x2_t a_lo, a_hi, max; - - a_lo = vget_low_f32(a); - a_hi = vget_high_f32(a); - max = vpmax_f32(a_lo, a_hi); - max = vpmax_f32(max, max); - - return vget_lane_f32(max, 0); -} - -template<> EIGEN_STRONG_INLINE int predux_max(const Packet4i& a) -{ - int32x2_t a_lo, a_hi, max; - - a_lo = vget_low_s32(a); - a_hi = vget_high_s32(a); - max = vpmax_s32(a_lo, a_hi); - max = vpmax_s32(max, max); - - return vget_lane_s32(max, 0); -} - -// this PALIGN_NEON business is to work around a bug in LLVM Clang 3.0 causing incorrect compilation errors, -// see bug 347 and this LLVM bug: http://llvm.org/bugs/show_bug.cgi?id=11074 -#define PALIGN_NEON(Offset,Type,Command) \ -template<>\ -struct palign_impl\ -{\ - EIGEN_STRONG_INLINE static void run(Type& first, const Type& second)\ - {\ - if (Offset!=0)\ - first = Command(first, second, Offset);\ - }\ -};\ - -PALIGN_NEON(0,Packet4f,vextq_f32) -PALIGN_NEON(1,Packet4f,vextq_f32) -PALIGN_NEON(2,Packet4f,vextq_f32) -PALIGN_NEON(3,Packet4f,vextq_f32) -PALIGN_NEON(0,Packet4i,vextq_s32) -PALIGN_NEON(1,Packet4i,vextq_s32) -PALIGN_NEON(2,Packet4i,vextq_s32) -PALIGN_NEON(3,Packet4i,vextq_s32) - -#undef PALIGN_NEON - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PACKET_MATH_NEON_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/SSE/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/SSE/CMakeLists.txt index 46ea7cc62..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/SSE/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/arch/SSE/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_arch_SSE_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_arch_SSE_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/arch/SSE COMPONENT Devel -) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/CMakeLists.txt index 21fc94ae3..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_Product_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_Product_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/products COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h index 3deed068e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_MKL.h @@ -1,146 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Level 3 BLAS SYRK/HERK implementation. - ******************************************************************************** -*/ - -#ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H -#define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H - -namespace Eigen { - -namespace internal { - -template -struct general_matrix_matrix_rankupdate : - general_matrix_matrix_triangular_product< - Index,Scalar,AStorageOrder,ConjugateA,Scalar,AStorageOrder,ConjugateA,ResStorageOrder,UpLo,BuiltIn> {}; - - -// try to go to BLAS specialization -#define EIGEN_MKL_RANKUPDATE_SPECIALIZE(Scalar) \ -template \ -struct general_matrix_matrix_triangular_product { \ - static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha) \ - { \ - if (lhs==rhs) { \ - general_matrix_matrix_rankupdate \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ - } else { \ - general_matrix_matrix_triangular_product \ - ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha); \ - } \ - } \ -}; - -EIGEN_MKL_RANKUPDATE_SPECIALIZE(double) -//EIGEN_MKL_RANKUPDATE_SPECIALIZE(dcomplex) -EIGEN_MKL_RANKUPDATE_SPECIALIZE(float) -//EIGEN_MKL_RANKUPDATE_SPECIALIZE(scomplex) - -// SYRK for float/double -#define EIGEN_MKL_RANKUPDATE_R(EIGTYPE, MKLTYPE, MKLFUNC) \ -template \ -struct general_matrix_matrix_rankupdate { \ - enum { \ - IsLower = (UpLo&Lower) == Lower, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = ((AStorageOrder==ColMajor) && ConjugateA) ? 1 : 0 \ - }; \ - static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ - { \ - /* typedef Matrix MatrixRhs;*/ \ -\ - MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ - char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'T':'N'; \ - MKLTYPE alpha_, beta_; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ - MKLFUNC(&uplo, &trans, &n, &k, &alpha_, lhs, &lda, &beta_, res, &ldc); \ - } \ -}; - -// HERK for complex data -#define EIGEN_MKL_RANKUPDATE_C(EIGTYPE, MKLTYPE, RTYPE, MKLFUNC) \ -template \ -struct general_matrix_matrix_rankupdate { \ - enum { \ - IsLower = (UpLo&Lower) == Lower, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = (((AStorageOrder==ColMajor) && ConjugateA) || ((AStorageOrder==RowMajor) && !ConjugateA)) ? 1 : 0 \ - }; \ - static EIGEN_STRONG_INLINE void run(Index size, Index depth,const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsStride, EIGTYPE* res, Index resStride, EIGTYPE alpha) \ - { \ - typedef Matrix MatrixType; \ -\ - MKL_INT lda=lhsStride, ldc=resStride, n=size, k=depth; \ - char uplo=(IsLower) ? 'L' : 'U', trans=(AStorageOrder==RowMajor) ? 'C':'N'; \ - RTYPE alpha_, beta_; \ - const EIGTYPE* a_ptr; \ -\ -/* Set alpha_ & beta_ */ \ -/* assign_scalar_eig2mkl(alpha_, alpha); */\ -/* assign_scalar_eig2mkl(beta_, EIGTYPE(1));*/ \ - alpha_ = alpha.real(); \ - beta_ = 1.0; \ -/* Copy with conjugation in some cases*/ \ - MatrixType a; \ - if (conjA) { \ - Map > mapA(lhs,n,k,OuterStride<>(lhsStride)); \ - a = mapA.conjugate(); \ - lda = a.outerStride(); \ - a_ptr = a.data(); \ - } else a_ptr=lhs; \ - MKLFUNC(&uplo, &trans, &n, &k, &alpha_, (MKLTYPE*)a_ptr, &lda, &beta_, (MKLTYPE*)res, &ldc); \ - } \ -}; - - -EIGEN_MKL_RANKUPDATE_R(double, double, dsyrk) -EIGEN_MKL_RANKUPDATE_R(float, float, ssyrk) - -//EIGEN_MKL_RANKUPDATE_C(dcomplex, MKL_Complex16, double, zherk) -//EIGEN_MKL_RANKUPDATE_C(scomplex, MKL_Complex8, double, cherk) - - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h index 060af328e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixMatrix_MKL.h @@ -1,118 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * General matrix-matrix product functionality based on ?GEMM. - ******************************************************************************** -*/ - -#ifndef EIGEN_GENERAL_MATRIX_MATRIX_MKL_H -#define EIGEN_GENERAL_MATRIX_MATRIX_MKL_H - -namespace Eigen { - -namespace internal { - -/********************************************************************** -* This file implements general matrix-matrix multiplication using BLAS -* gemm function via partial specialization of -* general_matrix_matrix_product::run(..) method for float, double, -* std::complex and std::complex types -**********************************************************************/ - -// gemm specialization - -#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, MKLTYPE, MKLPREFIX) \ -template< \ - typename Index, \ - int LhsStorageOrder, bool ConjugateLhs, \ - int RhsStorageOrder, bool ConjugateRhs> \ -struct general_matrix_matrix_product \ -{ \ -static void run(Index rows, Index cols, Index depth, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha, \ - level3_blocking& /*blocking*/, \ - GemmParallelInfo* /*info = 0*/) \ -{ \ - using std::conj; \ -\ - char transa, transb; \ - MKL_INT m, n, k, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX a_tmp, b_tmp; \ - EIGTYPE myone(1);\ -\ -/* Set transpose options */ \ - transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ - transb = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ -\ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ - k = (MKL_INT)depth; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if ((LhsStorageOrder==ColMajor) && (ConjugateLhs)) { \ - Map > lhs(_lhs,m,k,OuterStride<>(lhsStride)); \ - a_tmp = lhs.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else a = _lhs; \ -\ - if ((RhsStorageOrder==ColMajor) && (ConjugateRhs)) { \ - Map > rhs(_rhs,k,n,OuterStride<>(rhsStride)); \ - b_tmp = rhs.conjugate(); \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } else b = _rhs; \ -\ - MKLPREFIX##gemm(&transa, &transb, &m, &n, &k, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -}}; - -GEMM_SPECIALIZATION(double, d, double, d) -GEMM_SPECIALIZATION(float, f, float, s) -GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, z) -GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, c) - -} // end namespase internal - -} // end namespace Eigen - -#endif // EIGEN_GENERAL_MATRIX_MATRIX_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixVector_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixVector_MKL.h index 1cb9fe6b5..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixVector_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/GeneralMatrixVector_MKL.h @@ -1,131 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * General matrix-vector product functionality based on ?GEMV. - ******************************************************************************** -*/ - -#ifndef EIGEN_GENERAL_MATRIX_VECTOR_MKL_H -#define EIGEN_GENERAL_MATRIX_VECTOR_MKL_H - -namespace Eigen { - -namespace internal { - -/********************************************************************** -* This file implements general matrix-vector multiplication using BLAS -* gemv function via partial specialization of -* general_matrix_vector_product::run(..) method for float, double, -* std::complex and std::complex types -**********************************************************************/ - -// gemv specialization - -template -struct general_matrix_vector_product_gemv : - general_matrix_vector_product {}; - -#define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \ -template \ -struct general_matrix_vector_product { \ -static void run( \ - Index rows, Index cols, \ - const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsIncr, \ - Scalar* res, Index resIncr, Scalar alpha) \ -{ \ - if (ConjugateLhs) { \ - general_matrix_vector_product::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ - } else { \ - general_matrix_vector_product_gemv::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ - } \ -} \ -}; \ -template \ -struct general_matrix_vector_product { \ -static void run( \ - Index rows, Index cols, \ - const Scalar* lhs, Index lhsStride, \ - const Scalar* rhs, Index rhsIncr, \ - Scalar* res, Index resIncr, Scalar alpha) \ -{ \ - general_matrix_vector_product_gemv::run( \ - rows, cols, lhs, lhsStride, rhs, rhsIncr, res, resIncr, alpha); \ -} \ -}; \ - -EIGEN_MKL_GEMV_SPECIALIZE(double) -EIGEN_MKL_GEMV_SPECIALIZE(float) -EIGEN_MKL_GEMV_SPECIALIZE(dcomplex) -EIGEN_MKL_GEMV_SPECIALIZE(scomplex) - -#define EIGEN_MKL_GEMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLPREFIX) \ -template \ -struct general_matrix_vector_product_gemv \ -{ \ -typedef Matrix GEMVVector;\ -\ -static void run( \ - Index rows, Index cols, \ - const EIGTYPE* lhs, Index lhsStride, \ - const EIGTYPE* rhs, Index rhsIncr, \ - EIGTYPE* res, Index resIncr, EIGTYPE alpha) \ -{ \ - MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \ - MKLTYPE alpha_, beta_; \ - const EIGTYPE *x_ptr, myone(1); \ - char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \ - if (LhsStorageOrder==RowMajor) { \ - m=cols; \ - n=rows; \ - }\ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ - GEMVVector x_tmp; \ - if (ConjugateRhs) { \ - Map > map_x(rhs,cols,1,InnerStride<>(incx)); \ - x_tmp=map_x.conjugate(); \ - x_ptr=x_tmp.data(); \ - incx=1; \ - } else x_ptr=rhs; \ - MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ -}\ -}; - -EIGEN_MKL_GEMV_SPECIALIZATION(double, double, d) -EIGEN_MKL_GEMV_SPECIALIZATION(float, float, s) -EIGEN_MKL_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, z) -EIGEN_MKL_GEMV_SPECIALIZATION(scomplex, MKL_Complex8, c) - -} // end namespase internal - -} // end namespace Eigen - -#endif // EIGEN_GENERAL_MATRIX_VECTOR_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h index dfa687fef..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h @@ -1,295 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM. - ******************************************************************************** -*/ - -#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H -#define EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H - -namespace Eigen { - -namespace internal { - - -/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ - -#define EIGEN_MKL_SYMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ -\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='L', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - EIGTYPE myone(1);\ -\ -/* Set transpose options */ \ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (LhsStorageOrder==RowMajor) uplo='U'; \ - a = _lhs; \ -\ - if (RhsStorageOrder==RowMajor) { \ - Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = rhs.adjoint(); \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } else b = _rhs; \ -\ - MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -\ - } \ -}; - - -#define EIGEN_MKL_HEMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='L', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - Matrix a_tmp; \ - EIGTYPE myone(1); \ -\ -/* Set transpose options */ \ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)lhsStride; \ - ldb = (MKL_INT)rhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \ - Map, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \ - a_tmp = lhs.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else a = _lhs; \ - if (LhsStorageOrder==RowMajor) uplo='U'; \ -\ - if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \ - b = _rhs; } \ - else { \ - if (RhsStorageOrder==ColMajor && ConjugateRhs) { \ - Map > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \ - b_tmp = rhs.conjugate(); \ - } else \ - if (ConjugateRhs) { \ - Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = rhs.adjoint(); \ - } else { \ - Map > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = rhs.transpose(); \ - } \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } \ -\ - MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -\ - } \ -}; - -EIGEN_MKL_SYMM_L(double, double, d, d) -EIGEN_MKL_SYMM_L(float, float, f, s) -EIGEN_MKL_HEMM_L(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_HEMM_L(scomplex, MKL_Complex8, cf, c) - - -/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ - -#define EIGEN_MKL_SYMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ -\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='R', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - EIGTYPE myone(1);\ -\ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)rhsStride; \ - ldb = (MKL_INT)lhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (RhsStorageOrder==RowMajor) uplo='U'; \ - a = _rhs; \ -\ - if (LhsStorageOrder==RowMajor) { \ - Map > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \ - b_tmp = lhs.adjoint(); \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } else b = _lhs; \ -\ - MKLPREFIX##symm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ -\ - } \ -}; - - -#define EIGEN_MKL_HEMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_selfadjoint_matrix \ -{\ - static void run( \ - Index rows, Index cols, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha) \ - { \ - char side='R', uplo='L'; \ - MKL_INT m, n, lda, ldb, ldc; \ - const EIGTYPE *a, *b; \ - MKLTYPE alpha_, beta_; \ - MatrixX##EIGPREFIX b_tmp; \ - Matrix a_tmp; \ - EIGTYPE myone(1); \ -\ -/* Set m, n, k */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)cols; \ -\ -/* Set alpha_ & beta_ */ \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ -\ -/* Set lda, ldb, ldc */ \ - lda = (MKL_INT)rhsStride; \ - ldb = (MKL_INT)lhsStride; \ - ldc = (MKL_INT)resStride; \ -\ -/* Set a, b, c */ \ - if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \ - Map, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \ - a_tmp = rhs.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else a = _rhs; \ - if (RhsStorageOrder==RowMajor) uplo='U'; \ -\ - if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \ - b = _lhs; } \ - else { \ - if (LhsStorageOrder==ColMajor && ConjugateLhs) { \ - Map > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \ - b_tmp = lhs.conjugate(); \ - } else \ - if (ConjugateLhs) { \ - Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ - b_tmp = lhs.adjoint(); \ - } else { \ - Map > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \ - b_tmp = lhs.transpose(); \ - } \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ - } \ -\ - MKLPREFIX##hemm(&side, &uplo, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)b, &ldb, &beta_, (MKLTYPE*)res, &ldc); \ - } \ -}; - -EIGEN_MKL_SYMM_R(double, double, d, d) -EIGEN_MKL_SYMM_R(float, float, f, s) -EIGEN_MKL_HEMM_R(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_HEMM_R(scomplex, MKL_Complex8, cf, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h index 86684b66d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h @@ -1,114 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV. - ******************************************************************************** -*/ - -#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H -#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H - -namespace Eigen { - -namespace internal { - -/********************************************************************** -* This file implements selfadjoint matrix-vector multiplication using BLAS -**********************************************************************/ - -// symv/hemv specialization - -template -struct selfadjoint_matrix_vector_product_symv : - selfadjoint_matrix_vector_product {}; - -#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \ -template \ -struct selfadjoint_matrix_vector_product { \ -static void run( \ - Index size, const Scalar* lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \ - enum {\ - IsColMajor = StorageOrder==ColMajor \ - }; \ - if (IsColMajor == ConjugateLhs) {\ - selfadjoint_matrix_vector_product::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ - } else {\ - selfadjoint_matrix_vector_product_symv::run( \ - size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \ - }\ - } \ -}; \ - -EIGEN_MKL_SYMV_SPECIALIZE(double) -EIGEN_MKL_SYMV_SPECIALIZE(float) -EIGEN_MKL_SYMV_SPECIALIZE(dcomplex) -EIGEN_MKL_SYMV_SPECIALIZE(scomplex) - -#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \ -template \ -struct selfadjoint_matrix_vector_product_symv \ -{ \ -typedef Matrix SYMVVector;\ -\ -static void run( \ -Index size, const EIGTYPE* lhs, Index lhsStride, \ -const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \ -{ \ - enum {\ - IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \ - IsLower = UpLo == Lower ? 1 : 0 \ - }; \ - MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \ - MKLTYPE alpha_, beta_; \ - const EIGTYPE *x_ptr, myone(1); \ - char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, myone); \ - SYMVVector x_tmp; \ - if (ConjugateRhs) { \ - Map > map_x(_rhs,size,1,InnerStride<>(incx)); \ - x_tmp=map_x.conjugate(); \ - x_ptr=x_tmp.data(); \ - incx=1; \ - } else x_ptr=_rhs; \ - MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \ -}\ -}; - -EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv) -EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv) -EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) -EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h index 4cc56a42f..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h @@ -1,309 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Triangular matrix * matrix product functionality based on ?TRMM. - ******************************************************************************** -*/ - -#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H -#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H - -namespace Eigen { - -namespace internal { - - -template -struct product_triangular_matrix_matrix_trmm : - product_triangular_matrix_matrix {}; - - -// try to go to BLAS specialization -#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \ -template \ -struct product_triangular_matrix_matrix { \ - static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\ - const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking& blocking) { \ - product_triangular_matrix_matrix_trmm::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ - } \ -}; - -EIGEN_MKL_TRMM_SPECIALIZE(double, true) -EIGEN_MKL_TRMM_SPECIALIZE(double, false) -EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true) -EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false) -EIGEN_MKL_TRMM_SPECIALIZE(float, true) -EIGEN_MKL_TRMM_SPECIALIZE(float, false) -EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true) -EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false) - -// implements col-major += alpha * op(triangular) * op(general) -#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_triangular_matrix_matrix_trmm \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \ - }; \ -\ - static void run( \ - Index _rows, Index _cols, Index _depth, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha, level3_blocking& blocking) \ - { \ - Index diagSize = (std::min)(_rows,_depth); \ - Index rows = IsLower ? _rows : diagSize; \ - Index depth = IsLower ? diagSize : _depth; \ - Index cols = _cols; \ -\ - typedef Matrix MatrixLhs; \ - typedef Matrix MatrixRhs; \ -\ -/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ - if (rows != depth) { \ -\ - int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ -\ - if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \ - /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ - product_triangular_matrix_matrix::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ - /*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \ - } else { \ - /* Make sense to call GEMM */ \ - Map > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \ - MatrixLhs aa_tmp=lhsMap.template triangularView(); \ - MKL_INT aStride = aa_tmp.outerStride(); \ - gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ - general_matrix_matrix_product::run( \ - rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \ -\ - /*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ - } \ - return; \ - } \ - char side = 'L', transa, uplo, diag = 'N'; \ - EIGTYPE *b; \ - const EIGTYPE *a; \ - MKL_INT m, n, lda, ldb; \ - MKLTYPE alpha_; \ -\ -/* Set alpha_*/ \ - assign_scalar_eig2mkl(alpha_, alpha); \ -\ -/* Set m, n */ \ - m = (MKL_INT)diagSize; \ - n = (MKL_INT)cols; \ -\ -/* Set trans */ \ - transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \ -\ -/* Set b, ldb */ \ - Map > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \ - MatrixX##EIGPREFIX b_tmp; \ -\ - if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ -\ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ - MatrixLhs a_tmp; \ -\ - if ((conjA!=0) || (SetDiag==0)) { \ - if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \ - if (IsZeroDiag) \ - a_tmp.diagonal().setZero(); \ - else if (IsUnitDiag) \ - a_tmp.diagonal().setOnes();\ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _lhs; \ - lda = lhsStride; \ - } \ - /*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \ -/* call ?trmm*/ \ - MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ -\ -/* Add op(a_triangular)*b into res*/ \ - Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ - res_tmp=res_tmp+b_tmp; \ - } \ -}; - -EIGEN_MKL_TRMM_L(double, double, d, d) -EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMM_L(float, float, f, s) -EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c) - -// implements col-major += alpha * op(general) * op(triangular) -#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct product_triangular_matrix_matrix_trmm \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper, \ - conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \ - }; \ -\ - static void run( \ - Index _rows, Index _cols, Index _depth, \ - const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsStride, \ - EIGTYPE* res, Index resStride, \ - EIGTYPE alpha, level3_blocking& blocking) \ - { \ - Index diagSize = (std::min)(_cols,_depth); \ - Index rows = _rows; \ - Index depth = IsLower ? _depth : diagSize; \ - Index cols = IsLower ? diagSize : _cols; \ -\ - typedef Matrix MatrixLhs; \ - typedef Matrix MatrixRhs; \ -\ -/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \ - if (cols != depth) { \ -\ - int nthr = mkl_domain_get_max_threads(EIGEN_MKL_DOMAIN_BLAS); \ -\ - if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \ - /* Most likely no benefit to call TRMM or GEMM from MKL*/ \ - product_triangular_matrix_matrix::run( \ - _rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \ - /*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \ - } else { \ - /* Make sense to call GEMM */ \ - Map > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \ - MatrixRhs aa_tmp=rhsMap.template triangularView(); \ - MKL_INT aStride = aa_tmp.outerStride(); \ - gemm_blocking_space gemm_blocking(_rows,_cols,_depth); \ - general_matrix_matrix_product::run( \ - rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \ -\ - /*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \ - } \ - return; \ - } \ - char side = 'R', transa, uplo, diag = 'N'; \ - EIGTYPE *b; \ - const EIGTYPE *a; \ - MKL_INT m, n, lda, ldb; \ - MKLTYPE alpha_; \ -\ -/* Set alpha_*/ \ - assign_scalar_eig2mkl(alpha_, alpha); \ -\ -/* Set m, n */ \ - m = (MKL_INT)rows; \ - n = (MKL_INT)diagSize; \ -\ -/* Set trans */ \ - transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \ -\ -/* Set b, ldb */ \ - Map > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \ - MatrixX##EIGPREFIX b_tmp; \ -\ - if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \ - b = b_tmp.data(); \ - ldb = b_tmp.outerStride(); \ -\ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - Map > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \ - MatrixRhs a_tmp; \ -\ - if ((conjA!=0) || (SetDiag==0)) { \ - if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \ - if (IsZeroDiag) \ - a_tmp.diagonal().setZero(); \ - else if (IsUnitDiag) \ - a_tmp.diagonal().setOnes();\ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _rhs; \ - lda = rhsStride; \ - } \ - /*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \ -/* call ?trmm*/ \ - MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \ -\ -/* Add op(a_triangular)*b into res*/ \ - Map > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ - res_tmp=res_tmp+b_tmp; \ - } \ -}; - -EIGEN_MKL_TRMM_R(double, double, d, d) -EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z) -EIGEN_MKL_TRMM_R(float, float, f, s) -EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixVector_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixVector_MKL.h index 09f110da7..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixVector_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/products/TriangularMatrixVector_MKL.h @@ -1,247 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Triangular matrix-vector product functionality based on ?TRMV. - ******************************************************************************** -*/ - -#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H -#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H - -namespace Eigen { - -namespace internal { - -/********************************************************************** -* This file implements triangular matrix-vector multiplication using BLAS -**********************************************************************/ - -// trmv/hemv specialization - -template -struct triangular_matrix_vector_product_trmv : - triangular_matrix_vector_product {}; - -#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \ -template \ -struct triangular_matrix_vector_product { \ - static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ - triangular_matrix_vector_product_trmv::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - } \ -}; \ -template \ -struct triangular_matrix_vector_product { \ - static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \ - const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \ - triangular_matrix_vector_product_trmv::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - } \ -}; - -EIGEN_MKL_TRMV_SPECIALIZE(double) -EIGEN_MKL_TRMV_SPECIALIZE(float) -EIGEN_MKL_TRMV_SPECIALIZE(dcomplex) -EIGEN_MKL_TRMV_SPECIALIZE(scomplex) - -// implements col-major: res += alpha * op(triangular) * vector -#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \ -template \ -struct triangular_matrix_vector_product_trmv { \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper \ - }; \ - static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ - { \ - if (ConjLhs || IsZeroDiag) { \ - triangular_matrix_vector_product::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - return; \ - }\ - Index size = (std::min)(_rows,_cols); \ - Index rows = IsLower ? _rows : size; \ - Index cols = IsLower ? size : _cols; \ -\ - typedef VectorX##EIGPREFIX VectorRhs; \ - EIGTYPE *x, *y;\ -\ -/* Set x*/ \ - Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ - VectorRhs x_tmp; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ -\ -/* Square part handling */\ -\ - char trans, uplo, diag; \ - MKL_INT m, n, lda, incx, incy; \ - EIGTYPE const *a; \ - MKLTYPE alpha_, beta_; \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ -\ -/* Set m, n */ \ - n = (MKL_INT)size; \ - lda = lhsStride; \ - incx = 1; \ - incy = resIncr; \ -\ -/* Set uplo, trans and diag*/ \ - trans = 'N'; \ - uplo = IsLower ? 'L' : 'U'; \ - diag = IsUnitDiag ? 'U' : 'N'; \ -\ -/* call ?TRMV*/ \ - MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ -\ -/* Add op(a_tr)rhs into res*/ \ - MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ -/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ - if (size<(std::max)(rows,cols)) { \ - typedef Matrix MatrixLhs; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ - if (size \ -struct triangular_matrix_vector_product_trmv { \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - LowUp = IsLower ? Lower : Upper \ - }; \ - static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \ - const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \ - { \ - if (IsZeroDiag) { \ - triangular_matrix_vector_product::run( \ - _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \ - return; \ - }\ - Index size = (std::min)(_rows,_cols); \ - Index rows = IsLower ? _rows : size; \ - Index cols = IsLower ? size : _cols; \ -\ - typedef VectorX##EIGPREFIX VectorRhs; \ - EIGTYPE *x, *y;\ -\ -/* Set x*/ \ - Map > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \ - VectorRhs x_tmp; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ -\ -/* Square part handling */\ -\ - char trans, uplo, diag; \ - MKL_INT m, n, lda, incx, incy; \ - EIGTYPE const *a; \ - MKLTYPE alpha_, beta_; \ - assign_scalar_eig2mkl(alpha_, alpha); \ - assign_scalar_eig2mkl(beta_, EIGTYPE(1)); \ -\ -/* Set m, n */ \ - n = (MKL_INT)size; \ - lda = lhsStride; \ - incx = 1; \ - incy = resIncr; \ -\ -/* Set uplo, trans and diag*/ \ - trans = ConjLhs ? 'C' : 'T'; \ - uplo = IsLower ? 'U' : 'L'; \ - diag = IsUnitDiag ? 'U' : 'N'; \ -\ -/* call ?TRMV*/ \ - MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \ -\ -/* Add op(a_tr)rhs into res*/ \ - MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \ -/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \ - if (size<(std::max)(rows,cols)) { \ - typedef Matrix MatrixLhs; \ - if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ - x = x_tmp.data(); \ - if (size \ -struct triangular_solve_matrix \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ - }; \ - static void run( \ - Index size, Index otherSize, \ - const EIGTYPE* _tri, Index triStride, \ - EIGTYPE* _other, Index otherStride, level3_blocking& /*blocking*/) \ - { \ - MKL_INT m = size, n = otherSize, lda, ldb; \ - char side = 'L', uplo, diag='N', transa; \ - /* Set alpha_ */ \ - MKLTYPE alpha; \ - EIGTYPE myone(1); \ - assign_scalar_eig2mkl(alpha, myone); \ - ldb = otherStride;\ -\ - const EIGTYPE *a; \ -/* Set trans */ \ - transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - typedef Matrix MatrixTri; \ - Map > tri(_tri,size,size,OuterStride<>(triStride)); \ - MatrixTri a_tmp; \ -\ - if (conjA) { \ - a_tmp = tri.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _tri; \ - lda = triStride; \ - } \ - if (IsUnitDiag) diag='U'; \ -/* call ?trsm*/ \ - MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ - } \ -}; - -EIGEN_MKL_TRSM_L(double, double, d) -EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z) -EIGEN_MKL_TRSM_L(float, float, s) -EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c) - - -// implements RightSide general * op(triangular)^-1 -#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template \ -struct triangular_solve_matrix \ -{ \ - enum { \ - IsLower = (Mode&Lower) == Lower, \ - IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \ - IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \ - conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \ - }; \ - static void run( \ - Index size, Index otherSize, \ - const EIGTYPE* _tri, Index triStride, \ - EIGTYPE* _other, Index otherStride, level3_blocking& /*blocking*/) \ - { \ - MKL_INT m = otherSize, n = size, lda, ldb; \ - char side = 'R', uplo, diag='N', transa; \ - /* Set alpha_ */ \ - MKLTYPE alpha; \ - EIGTYPE myone(1); \ - assign_scalar_eig2mkl(alpha, myone); \ - ldb = otherStride;\ -\ - const EIGTYPE *a; \ -/* Set trans */ \ - transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \ -/* Set uplo */ \ - uplo = IsLower ? 'L' : 'U'; \ - if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \ -/* Set a, lda */ \ - typedef Matrix MatrixTri; \ - Map > tri(_tri,size,size,OuterStride<>(triStride)); \ - MatrixTri a_tmp; \ -\ - if (conjA) { \ - a_tmp = tri.conjugate(); \ - a = a_tmp.data(); \ - lda = a_tmp.outerStride(); \ - } else { \ - a = _tri; \ - lda = triStride; \ - } \ - if (IsUnitDiag) diag='U'; \ -/* call ?trsm*/ \ - MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \ - /*std::cout << "TRMS_L specialization!\n";*/ \ - } \ -}; - -EIGEN_MKL_TRSM_R(double, double, d) -EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z) -EIGEN_MKL_TRSM_R(float, float, s) -EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c) - - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/util/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Core/util/CMakeLists.txt index a1e2e521f..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/util/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/util/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_util_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_util_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Core/util/NonMPL2.h b/thirdparty/eigen-3.2.10/Eigen/src/Core/util/NonMPL2.h index 1af67cf18..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Core/util/NonMPL2.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Core/util/NonMPL2.h @@ -1,3 +0,0 @@ -#ifdef EIGEN_MPL2_ONLY -#error Including non-MPL2 code in EIGEN_MPL2_ONLY mode -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Block.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Block.h index 604456f40..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Block.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Block.h @@ -1,126 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BLOCK2_H -#define EIGEN_BLOCK2_H - -namespace Eigen { - -/** \returns a dynamic-size expression of a corner of *this. - * - * \param type the type of corner. Can be \a Eigen::TopLeft, \a Eigen::TopRight, - * \a Eigen::BottomLeft, \a Eigen::BottomRight. - * \param cRows the number of rows in the corner - * \param cCols the number of columns in the corner - * - * Example: \include MatrixBase_corner_enum_int_int.cpp - * Output: \verbinclude MatrixBase_corner_enum_int_int.out - * - * \note Even though the returned expression has dynamic size, in the case - * when it is applied to a fixed-size matrix, it inherits a fixed maximal size, - * which means that evaluating it does not cause a dynamic memory allocation. - * - * \sa class Block, block(Index,Index,Index,Index) - */ -template -inline Block DenseBase - ::corner(CornerType type, Index cRows, Index cCols) -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0, cRows, cCols); - case TopRight: - return Block(derived(), 0, cols() - cCols, cRows, cCols); - case BottomLeft: - return Block(derived(), rows() - cRows, 0, cRows, cCols); - case BottomRight: - return Block(derived(), rows() - cRows, cols() - cCols, cRows, cCols); - } -} - -/** This is the const version of corner(CornerType, Index, Index).*/ -template -inline const Block -DenseBase::corner(CornerType type, Index cRows, Index cCols) const -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0, cRows, cCols); - case TopRight: - return Block(derived(), 0, cols() - cCols, cRows, cCols); - case BottomLeft: - return Block(derived(), rows() - cRows, 0, cRows, cCols); - case BottomRight: - return Block(derived(), rows() - cRows, cols() - cCols, cRows, cCols); - } -} - -/** \returns a fixed-size expression of a corner of *this. - * - * \param type the type of corner. Can be \a Eigen::TopLeft, \a Eigen::TopRight, - * \a Eigen::BottomLeft, \a Eigen::BottomRight. - * - * The template parameters CRows and CCols arethe number of rows and columns in the corner. - * - * Example: \include MatrixBase_template_int_int_corner_enum.cpp - * Output: \verbinclude MatrixBase_template_int_int_corner_enum.out - * - * \sa class Block, block(Index,Index,Index,Index) - */ -template -template -inline Block -DenseBase::corner(CornerType type) -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0); - case TopRight: - return Block(derived(), 0, cols() - CCols); - case BottomLeft: - return Block(derived(), rows() - CRows, 0); - case BottomRight: - return Block(derived(), rows() - CRows, cols() - CCols); - } -} - -/** This is the const version of corner(CornerType).*/ -template -template -inline const Block -DenseBase::corner(CornerType type) const -{ - switch(type) - { - default: - eigen_assert(false && "Bad corner type."); - case TopLeft: - return Block(derived(), 0, 0); - case TopRight: - return Block(derived(), 0, cols() - CCols); - case BottomLeft: - return Block(derived(), rows() - CRows, 0); - case BottomRight: - return Block(derived(), rows() - CRows, cols() - CCols); - } -} - -} // end namespace Eigen - -#endif // EIGEN_BLOCK2_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CMakeLists.txt index 7ae41b3cb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CMakeLists.txt @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_Eigen2Support_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Eigen2Support_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigen2Support COMPONENT Devel - ) - -ADD_SUBDIRECTORY(Geometry) \ No newline at end of file diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Cwise.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Cwise.h index d95009b6e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Cwise.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Cwise.h @@ -1,192 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CWISE_H -#define EIGEN_CWISE_H - -namespace Eigen { - -/** \internal - * convenient macro to defined the return type of a cwise binary operation */ -#define EIGEN_CWISE_BINOP_RETURN_TYPE(OP) \ - CwiseBinaryOp::Scalar>, ExpressionType, OtherDerived> - -/** \internal - * convenient macro to defined the return type of a cwise unary operation */ -#define EIGEN_CWISE_UNOP_RETURN_TYPE(OP) \ - CwiseUnaryOp::Scalar>, ExpressionType> - -/** \internal - * convenient macro to defined the return type of a cwise comparison to a scalar */ -#define EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(OP) \ - CwiseBinaryOp::Scalar>, ExpressionType, \ - typename ExpressionType::ConstantReturnType > - -/** \class Cwise - * - * \brief Pseudo expression providing additional coefficient-wise operations - * - * \param ExpressionType the type of the object on which to do coefficient-wise operations - * - * This class represents an expression with additional coefficient-wise features. - * It is the return type of MatrixBase::cwise() - * and most of the time this is the only way it is used. - * - * Example: \include MatrixBase_cwise_const.cpp - * Output: \verbinclude MatrixBase_cwise_const.out - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_CWISE_PLUGIN. - * - * \sa MatrixBase::cwise() const, MatrixBase::cwise() - */ -template class Cwise -{ - public: - - typedef typename internal::traits::Scalar Scalar; - typedef typename internal::conditional::ret, - ExpressionType, const ExpressionType&>::type ExpressionTypeNested; - typedef CwiseUnaryOp, ExpressionType> ScalarAddReturnType; - - inline Cwise(const ExpressionType& matrix) : m_matrix(matrix) {} - - /** \internal */ - inline const ExpressionType& _expression() const { return m_matrix; } - - template - const EIGEN_CWISE_PRODUCT_RETURN_TYPE(ExpressionType,OtherDerived) - operator*(const MatrixBase &other) const; - - template - const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op) - operator/(const MatrixBase &other) const; - - /** \deprecated ArrayBase::min() */ - template - const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op) - (min)(const MatrixBase &other) const - { return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_min_op)(_expression(), other.derived()); } - - /** \deprecated ArrayBase::max() */ - template - const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op) - (max)(const MatrixBase &other) const - { return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_max_op)(_expression(), other.derived()); } - - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs_op) abs() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs2_op) abs2() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_square_op) square() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cube_op) cube() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_inverse_op) inverse() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sqrt_op) sqrt() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_exp_op) exp() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_log_op) log() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cos_op) cos() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sin_op) sin() const; - const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_pow_op) pow(const Scalar& exponent) const; - - const ScalarAddReturnType - operator+(const Scalar& scalar) const; - - /** \relates Cwise */ - friend const ScalarAddReturnType - operator+(const Scalar& scalar, const Cwise& mat) - { return mat + scalar; } - - ExpressionType& operator+=(const Scalar& scalar); - - const ScalarAddReturnType - operator-(const Scalar& scalar) const; - - ExpressionType& operator-=(const Scalar& scalar); - - template - inline ExpressionType& operator*=(const MatrixBase &other); - - template - inline ExpressionType& operator/=(const MatrixBase &other); - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less) - operator<(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less_equal) - operator<=(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater) - operator>(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater_equal) - operator>=(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::equal_to) - operator==(const MatrixBase& other) const; - - template const EIGEN_CWISE_BINOP_RETURN_TYPE(std::not_equal_to) - operator!=(const MatrixBase& other) const; - - // comparisons to a scalar value - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less) - operator<(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less_equal) - operator<=(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater) - operator>(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater_equal) - operator>=(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::equal_to) - operator==(Scalar s) const; - - const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to) - operator!=(Scalar s) const; - - // allow to extend Cwise outside Eigen - #ifdef EIGEN_CWISE_PLUGIN - #include EIGEN_CWISE_PLUGIN - #endif - - protected: - ExpressionTypeNested m_matrix; -}; - - -/** \returns a Cwise wrapper of *this providing additional coefficient-wise operations - * - * Example: \include MatrixBase_cwise_const.cpp - * Output: \verbinclude MatrixBase_cwise_const.out - * - * \sa class Cwise, cwise() - */ -template -inline const Cwise MatrixBase::cwise() const -{ - return derived(); -} - -/** \returns a Cwise wrapper of *this providing additional coefficient-wise operations - * - * Example: \include MatrixBase_cwise.cpp - * Output: \verbinclude MatrixBase_cwise.out - * - * \sa class Cwise, cwise() const - */ -template -inline Cwise MatrixBase::cwise() -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_CWISE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CwiseOperators.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CwiseOperators.h index 482f30648..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CwiseOperators.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/CwiseOperators.h @@ -1,298 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ARRAY_CWISE_OPERATORS_H -#define EIGEN_ARRAY_CWISE_OPERATORS_H - -namespace Eigen { - -/*************************************************************************** -* The following functions were defined in Core -***************************************************************************/ - - -/** \deprecated ArrayBase::abs() */ -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs_op) -Cwise::abs() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::abs2() */ -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_abs2_op) -Cwise::abs2() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::exp() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_exp_op) -Cwise::exp() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::log() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_log_op) -Cwise::log() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::operator*() */ -template -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(ExpressionType,OtherDerived) -Cwise::operator*(const MatrixBase &other) const -{ - return EIGEN_CWISE_PRODUCT_RETURN_TYPE(ExpressionType,OtherDerived)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator/() */ -template -template -EIGEN_STRONG_INLINE const EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op) -Cwise::operator/(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(internal::scalar_quotient_op)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator*=() */ -template -template -inline ExpressionType& Cwise::operator*=(const MatrixBase &other) -{ - return m_matrix.const_cast_derived() = *this * other; -} - -/** \deprecated ArrayBase::operator/=() */ -template -template -inline ExpressionType& Cwise::operator/=(const MatrixBase &other) -{ - return m_matrix.const_cast_derived() = *this / other; -} - -/*************************************************************************** -* The following functions were defined in Array -***************************************************************************/ - -// -- unary operators -- - -/** \deprecated ArrayBase::sqrt() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sqrt_op) -Cwise::sqrt() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::cos() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cos_op) -Cwise::cos() const -{ - return _expression(); -} - - -/** \deprecated ArrayBase::sin() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_sin_op) -Cwise::sin() const -{ - return _expression(); -} - - -/** \deprecated ArrayBase::log() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_pow_op) -Cwise::pow(const Scalar& exponent) const -{ - return EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_pow_op)(_expression(), internal::scalar_pow_op(exponent)); -} - - -/** \deprecated ArrayBase::inverse() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_inverse_op) -Cwise::inverse() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::square() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_square_op) -Cwise::square() const -{ - return _expression(); -} - -/** \deprecated ArrayBase::cube() */ -template -inline const EIGEN_CWISE_UNOP_RETURN_TYPE(internal::scalar_cube_op) -Cwise::cube() const -{ - return _expression(); -} - - -// -- binary operators -- - -/** \deprecated ArrayBase::operator<() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less) -Cwise::operator<(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::less)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::<=() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::less_equal) -Cwise::operator<=(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::less_equal)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator>() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater) -Cwise::operator>(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator>=() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater_equal) -Cwise::operator>=(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::greater_equal)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator==() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::equal_to) -Cwise::operator==(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::equal_to)(_expression(), other.derived()); -} - -/** \deprecated ArrayBase::operator!=() */ -template -template -inline const EIGEN_CWISE_BINOP_RETURN_TYPE(std::not_equal_to) -Cwise::operator!=(const MatrixBase &other) const -{ - return EIGEN_CWISE_BINOP_RETURN_TYPE(std::not_equal_to)(_expression(), other.derived()); -} - -// comparisons to scalar value - -/** \deprecated ArrayBase::operator<(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less) -Cwise::operator<(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator<=(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less_equal) -Cwise::operator<=(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::less_equal)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator>(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater) -Cwise::operator>(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator>=(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater_equal) -Cwise::operator>=(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::greater_equal)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator==(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::equal_to) -Cwise::operator==(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::equal_to)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -/** \deprecated ArrayBase::operator!=(Scalar) */ -template -inline const EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to) -Cwise::operator!=(Scalar s) const -{ - return EIGEN_CWISE_COMP_TO_SCALAR_RETURN_TYPE(std::not_equal_to)(_expression(), - typename ExpressionType::ConstantReturnType(_expression().rows(), _expression().cols(), s)); -} - -// scalar addition - -/** \deprecated ArrayBase::operator+(Scalar) */ -template -inline const typename Cwise::ScalarAddReturnType -Cwise::operator+(const Scalar& scalar) const -{ - return typename Cwise::ScalarAddReturnType(m_matrix, internal::scalar_add_op(scalar)); -} - -/** \deprecated ArrayBase::operator+=(Scalar) */ -template -inline ExpressionType& Cwise::operator+=(const Scalar& scalar) -{ - return m_matrix.const_cast_derived() = *this + scalar; -} - -/** \deprecated ArrayBase::operator-(Scalar) */ -template -inline const typename Cwise::ScalarAddReturnType -Cwise::operator-(const Scalar& scalar) const -{ - return *this + (-scalar); -} - -/** \deprecated ArrayBase::operator-=(Scalar) */ -template -inline ExpressionType& Cwise::operator-=(const Scalar& scalar) -{ - return m_matrix.const_cast_derived() = *this - scalar; -} - -} // end namespace Eigen - -#endif // EIGEN_ARRAY_CWISE_OPERATORS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AlignedBox.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AlignedBox.h index 2e4309dd9..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AlignedBox.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AlignedBox.h @@ -1,159 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * \nonstableyet - * - * \class AlignedBox - * - * \brief An axis aligned box - * - * \param _Scalar the type of the scalar coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. - * - * This class represents an axis aligned box as a pair of the minimal and maximal corners. - */ -template -class AlignedBox -{ -public: -EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1) - enum { AmbientDimAtCompileTime = _AmbientDim }; - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - /** Default constructor initializing a null box. */ - inline AlignedBox() - { if (AmbientDimAtCompileTime!=Dynamic) setNull(); } - - /** Constructs a null box with \a _dim the dimension of the ambient space. */ - inline explicit AlignedBox(int _dim) : m_min(_dim), m_max(_dim) - { setNull(); } - - /** Constructs a box with extremities \a _min and \a _max. */ - inline AlignedBox(const VectorType& _min, const VectorType& _max) : m_min(_min), m_max(_max) {} - - /** Constructs a box containing a single point \a p. */ - inline explicit AlignedBox(const VectorType& p) : m_min(p), m_max(p) {} - - ~AlignedBox() {} - - /** \returns the dimension in which the box holds */ - inline int dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size()-1 : AmbientDimAtCompileTime; } - - /** \returns true if the box is null, i.e, empty. */ - inline bool isNull() const { return (m_min.cwise() > m_max).any(); } - - /** Makes \c *this a null/empty box. */ - inline void setNull() - { - m_min.setConstant( (std::numeric_limits::max)()); - m_max.setConstant(-(std::numeric_limits::max)()); - } - - /** \returns the minimal corner */ - inline const VectorType& (min)() const { return m_min; } - /** \returns a non const reference to the minimal corner */ - inline VectorType& (min)() { return m_min; } - /** \returns the maximal corner */ - inline const VectorType& (max)() const { return m_max; } - /** \returns a non const reference to the maximal corner */ - inline VectorType& (max)() { return m_max; } - - /** \returns true if the point \a p is inside the box \c *this. */ - inline bool contains(const VectorType& p) const - { return (m_min.cwise()<=p).all() && (p.cwise()<=m_max).all(); } - - /** \returns true if the box \a b is entirely inside the box \c *this. */ - inline bool contains(const AlignedBox& b) const - { return (m_min.cwise()<=(b.min)()).all() && ((b.max)().cwise()<=m_max).all(); } - - /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. */ - inline AlignedBox& extend(const VectorType& p) - { m_min = (m_min.cwise().min)(p); m_max = (m_max.cwise().max)(p); return *this; } - - /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. */ - inline AlignedBox& extend(const AlignedBox& b) - { m_min = (m_min.cwise().min)(b.m_min); m_max = (m_max.cwise().max)(b.m_max); return *this; } - - /** Clamps \c *this by the box \a b and returns a reference to \c *this. */ - inline AlignedBox& clamp(const AlignedBox& b) - { m_min = (m_min.cwise().max)(b.m_min); m_max = (m_max.cwise().min)(b.m_max); return *this; } - - /** Translate \c *this by the vector \a t and returns a reference to \c *this. */ - inline AlignedBox& translate(const VectorType& t) - { m_min += t; m_max += t; return *this; } - - /** \returns the squared distance between the point \a p and the box \c *this, - * and zero if \a p is inside the box. - * \sa exteriorDistance() - */ - inline Scalar squaredExteriorDistance(const VectorType& p) const; - - /** \returns the distance between the point \a p and the box \c *this, - * and zero if \a p is inside the box. - * \sa squaredExteriorDistance() - */ - inline Scalar exteriorDistance(const VectorType& p) const - { return ei_sqrt(squaredExteriorDistance(p)); } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { - return typename internal::cast_return_type >::type(*this); - } - - /** Copy constructor with scalar type conversion */ - template - inline explicit AlignedBox(const AlignedBox& other) - { - m_min = (other.min)().template cast(); - m_max = (other.max)().template cast(); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const AlignedBox& other, typename NumTraits::Real prec = precision()) const - { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); } - -protected: - - VectorType m_min, m_max; -}; - -template -inline Scalar AlignedBox::squaredExteriorDistance(const VectorType& p) const -{ - Scalar dist2(0); - Scalar aux; - for (int k=0; k - -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - -#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS -#include "RotationBase.h" -#include "Rotation2D.h" -#include "Quaternion.h" -#include "AngleAxis.h" -#include "Transform.h" -#include "Translation.h" -#include "Scaling.h" -#include "AlignedBox.h" -#include "Hyperplane.h" -#include "ParametrizedLine.h" -#endif - - -#define RotationBase eigen2_RotationBase -#define Rotation2D eigen2_Rotation2D -#define Rotation2Df eigen2_Rotation2Df -#define Rotation2Dd eigen2_Rotation2Dd - -#define Quaternion eigen2_Quaternion -#define Quaternionf eigen2_Quaternionf -#define Quaterniond eigen2_Quaterniond - -#define AngleAxis eigen2_AngleAxis -#define AngleAxisf eigen2_AngleAxisf -#define AngleAxisd eigen2_AngleAxisd - -#define Transform eigen2_Transform -#define Transform2f eigen2_Transform2f -#define Transform2d eigen2_Transform2d -#define Transform3f eigen2_Transform3f -#define Transform3d eigen2_Transform3d - -#define Translation eigen2_Translation -#define Translation2f eigen2_Translation2f -#define Translation2d eigen2_Translation2d -#define Translation3f eigen2_Translation3f -#define Translation3d eigen2_Translation3d - -#define Scaling eigen2_Scaling -#define Scaling2f eigen2_Scaling2f -#define Scaling2d eigen2_Scaling2d -#define Scaling3f eigen2_Scaling3f -#define Scaling3d eigen2_Scaling3d - -#define AlignedBox eigen2_AlignedBox - -#define Hyperplane eigen2_Hyperplane -#define ParametrizedLine eigen2_ParametrizedLine - -#define ei_toRotationMatrix eigen2_ei_toRotationMatrix -#define ei_quaternion_assign_impl eigen2_ei_quaternion_assign_impl -#define ei_transform_product_impl eigen2_ei_transform_product_impl - -#include "RotationBase.h" -#include "Rotation2D.h" -#include "Quaternion.h" -#include "AngleAxis.h" -#include "Transform.h" -#include "Translation.h" -#include "Scaling.h" -#include "AlignedBox.h" -#include "Hyperplane.h" -#include "ParametrizedLine.h" - -#undef ei_toRotationMatrix -#undef ei_quaternion_assign_impl -#undef ei_transform_product_impl - -#undef RotationBase -#undef Rotation2D -#undef Rotation2Df -#undef Rotation2Dd - -#undef Quaternion -#undef Quaternionf -#undef Quaterniond - -#undef AngleAxis -#undef AngleAxisf -#undef AngleAxisd - -#undef Transform -#undef Transform2f -#undef Transform2d -#undef Transform3f -#undef Transform3d - -#undef Translation -#undef Translation2f -#undef Translation2d -#undef Translation3f -#undef Translation3d - -#undef Scaling -#undef Scaling2f -#undef Scaling2d -#undef Scaling3f -#undef Scaling3d - -#undef AlignedBox - -#undef Hyperplane -#undef ParametrizedLine - -#endif // EIGEN2_GEOMETRY_MODULE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AngleAxis.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AngleAxis.h index af598a403..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AngleAxis.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/AngleAxis.h @@ -1,214 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class AngleAxis - * - * \brief Represents a 3D rotation as a rotation angle around an arbitrary 3D axis - * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * - * The following two typedefs are provided for convenience: - * \li \c AngleAxisf for \c float - * \li \c AngleAxisd for \c double - * - * \addexample AngleAxisForEuler \label How to define a rotation from Euler-angles - * - * Combined with MatrixBase::Unit{X,Y,Z}, AngleAxis can be used to easily - * mimic Euler-angles. Here is an example: - * \include AngleAxis_mimic_euler.cpp - * Output: \verbinclude AngleAxis_mimic_euler.out - * - * \note This class is not aimed to be used to store a rotation transformation, - * but rather to make easier the creation of other rotation (Quaternion, rotation Matrix) - * and transformation objects. - * - * \sa class Quaternion, class Transform, MatrixBase::UnitX() - */ - -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class AngleAxis : public RotationBase,3> -{ - typedef RotationBase,3> Base; - -public: - - using Base::operator*; - - enum { Dim = 3 }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - typedef Matrix Matrix3; - typedef Matrix Vector3; - typedef Quaternion QuaternionType; - -protected: - - Vector3 m_axis; - Scalar m_angle; - -public: - - /** Default constructor without initialization. */ - AngleAxis() {} - /** Constructs and initialize the angle-axis rotation from an \a angle in radian - * and an \a axis which must be normalized. */ - template - inline AngleAxis(Scalar angle, const MatrixBase& axis) : m_axis(axis), m_angle(angle) {} - /** Constructs and initialize the angle-axis rotation from a quaternion \a q. */ - inline AngleAxis(const QuaternionType& q) { *this = q; } - /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */ - template - inline explicit AngleAxis(const MatrixBase& m) { *this = m; } - - Scalar angle() const { return m_angle; } - Scalar& angle() { return m_angle; } - - const Vector3& axis() const { return m_axis; } - Vector3& axis() { return m_axis; } - - /** Concatenates two rotations */ - inline QuaternionType operator* (const AngleAxis& other) const - { return QuaternionType(*this) * QuaternionType(other); } - - /** Concatenates two rotations */ - inline QuaternionType operator* (const QuaternionType& other) const - { return QuaternionType(*this) * other; } - - /** Concatenates two rotations */ - friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b) - { return a * QuaternionType(b); } - - /** Concatenates two rotations */ - inline Matrix3 operator* (const Matrix3& other) const - { return toRotationMatrix() * other; } - - /** Concatenates two rotations */ - inline friend Matrix3 operator* (const Matrix3& a, const AngleAxis& b) - { return a * b.toRotationMatrix(); } - - /** Applies rotation to vector */ - inline Vector3 operator* (const Vector3& other) const - { return toRotationMatrix() * other; } - - /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */ - AngleAxis inverse() const - { return AngleAxis(-m_angle, m_axis); } - - AngleAxis& operator=(const QuaternionType& q); - template - AngleAxis& operator=(const MatrixBase& m); - - template - AngleAxis& fromRotationMatrix(const MatrixBase& m); - Matrix3 toRotationMatrix(void) const; - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit AngleAxis(const AngleAxis& other) - { - m_axis = other.axis().template cast(); - m_angle = Scalar(other.angle()); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const AngleAxis& other, typename NumTraits::Real prec = precision()) const - { return m_axis.isApprox(other.m_axis, prec) && ei_isApprox(m_angle,other.m_angle, prec); } -}; - -/** \ingroup Geometry_Module - * single precision angle-axis type */ -typedef AngleAxis AngleAxisf; -/** \ingroup Geometry_Module - * double precision angle-axis type */ -typedef AngleAxis AngleAxisd; - -/** Set \c *this from a quaternion. - * The axis is normalized. - */ -template -AngleAxis& AngleAxis::operator=(const QuaternionType& q) -{ - Scalar n2 = q.vec().squaredNorm(); - if (n2 < precision()*precision()) - { - m_angle = 0; - m_axis << 1, 0, 0; - } - else - { - m_angle = 2*std::acos(q.w()); - m_axis = q.vec() / ei_sqrt(n2); - } - return *this; -} - -/** Set \c *this from a 3x3 rotation matrix \a mat. - */ -template -template -AngleAxis& AngleAxis::operator=(const MatrixBase& mat) -{ - // Since a direct conversion would not be really faster, - // let's use the robust Quaternion implementation: - return *this = QuaternionType(mat); -} - -/** Constructs and \returns an equivalent 3x3 rotation matrix. - */ -template -typename AngleAxis::Matrix3 -AngleAxis::toRotationMatrix(void) const -{ - Matrix3 res; - Vector3 sin_axis = ei_sin(m_angle) * m_axis; - Scalar c = ei_cos(m_angle); - Vector3 cos1_axis = (Scalar(1)-c) * m_axis; - - Scalar tmp; - tmp = cos1_axis.x() * m_axis.y(); - res.coeffRef(0,1) = tmp - sin_axis.z(); - res.coeffRef(1,0) = tmp + sin_axis.z(); - - tmp = cos1_axis.x() * m_axis.z(); - res.coeffRef(0,2) = tmp + sin_axis.y(); - res.coeffRef(2,0) = tmp - sin_axis.y(); - - tmp = cos1_axis.y() * m_axis.z(); - res.coeffRef(1,2) = tmp - sin_axis.x(); - res.coeffRef(2,1) = tmp + sin_axis.x(); - - res.diagonal() = (cos1_axis.cwise() * m_axis).cwise() + c; - - return res; -} - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/CMakeLists.txt index c347a8f26..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Eigen2Support_Geometry_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Eigen2Support_Geometry_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigen2Support/Geometry - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Hyperplane.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Hyperplane.h index b95bf00ec..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Hyperplane.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Hyperplane.h @@ -1,254 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Hyperplane - * - * \brief A hyperplane - * - * A hyperplane is an affine subspace of dimension n-1 in a space of dimension n. - * For example, a hyperplane in a plane is a line; a hyperplane in 3-space is a plane. - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. - * Notice that the dimension of the hyperplane is _AmbientDim-1. - * - * This class represents an hyperplane as the zero set of the implicit equation - * \f$ n \cdot x + d = 0 \f$ where \f$ n \f$ is a unit normal vector of the plane (linear part) - * and \f$ d \f$ is the distance (offset) to the origin. - */ -template -class Hyperplane -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim==Dynamic ? Dynamic : _AmbientDim+1) - enum { AmbientDimAtCompileTime = _AmbientDim }; - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix Coefficients; - typedef Block NormalReturnType; - - /** Default constructor without initialization */ - inline Hyperplane() {} - - /** Constructs a dynamic-size hyperplane with \a _dim the dimension - * of the ambient space */ - inline explicit Hyperplane(int _dim) : m_coeffs(_dim+1) {} - - /** Construct a plane from its normal \a n and a point \a e onto the plane. - * \warning the vector normal is assumed to be normalized. - */ - inline Hyperplane(const VectorType& n, const VectorType& e) - : m_coeffs(n.size()+1) - { - normal() = n; - offset() = -e.eigen2_dot(n); - } - - /** Constructs a plane from its normal \a n and distance to the origin \a d - * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$. - * \warning the vector normal is assumed to be normalized. - */ - inline Hyperplane(const VectorType& n, Scalar d) - : m_coeffs(n.size()+1) - { - normal() = n; - offset() = d; - } - - /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space - * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made. - */ - static inline Hyperplane Through(const VectorType& p0, const VectorType& p1) - { - Hyperplane result(p0.size()); - result.normal() = (p1 - p0).unitOrthogonal(); - result.offset() = -result.normal().eigen2_dot(p0); - return result; - } - - /** Constructs a hyperplane passing through the three points. The dimension of the ambient space - * is required to be exactly 3. - */ - static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2) - { - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3) - Hyperplane result(p0.size()); - result.normal() = (p2 - p0).cross(p1 - p0).normalized(); - result.offset() = -result.normal().eigen2_dot(p0); - return result; - } - - /** Constructs a hyperplane passing through the parametrized line \a parametrized. - * If the dimension of the ambient space is greater than 2, then there isn't uniqueness, - * so an arbitrary choice is made. - */ - // FIXME to be consitent with the rest this could be implemented as a static Through function ?? - explicit Hyperplane(const ParametrizedLine& parametrized) - { - normal() = parametrized.direction().unitOrthogonal(); - offset() = -normal().eigen2_dot(parametrized.origin()); - } - - ~Hyperplane() {} - - /** \returns the dimension in which the plane holds */ - inline int dim() const { return int(AmbientDimAtCompileTime)==Dynamic ? m_coeffs.size()-1 : int(AmbientDimAtCompileTime); } - - /** normalizes \c *this */ - void normalize(void) - { - m_coeffs /= normal().norm(); - } - - /** \returns the signed distance between the plane \c *this and a point \a p. - * \sa absDistance() - */ - inline Scalar signedDistance(const VectorType& p) const { return p.eigen2_dot(normal()) + offset(); } - - /** \returns the absolute distance between the plane \c *this and a point \a p. - * \sa signedDistance() - */ - inline Scalar absDistance(const VectorType& p) const { return ei_abs(signedDistance(p)); } - - /** \returns the projection of a point \a p onto the plane \c *this. - */ - inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); } - - /** \returns a constant reference to the unit normal vector of the plane, which corresponds - * to the linear part of the implicit equation. - */ - inline const NormalReturnType normal() const { return NormalReturnType(*const_cast(&m_coeffs),0,0,dim(),1); } - - /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds - * to the linear part of the implicit equation. - */ - inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); } - - /** \returns the distance to the origin, which is also the "constant term" of the implicit equation - * \warning the vector normal is assumed to be normalized. - */ - inline const Scalar& offset() const { return m_coeffs.coeff(dim()); } - - /** \returns a non-constant reference to the distance to the origin, which is also the constant part - * of the implicit equation */ - inline Scalar& offset() { return m_coeffs(dim()); } - - /** \returns a constant reference to the coefficients c_i of the plane equation: - * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$ - */ - inline const Coefficients& coeffs() const { return m_coeffs; } - - /** \returns a non-constant reference to the coefficients c_i of the plane equation: - * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$ - */ - inline Coefficients& coeffs() { return m_coeffs; } - - /** \returns the intersection of *this with \a other. - * - * \warning The ambient space must be a plane, i.e. have dimension 2, so that \c *this and \a other are lines. - * - * \note If \a other is approximately parallel to *this, this method will return any point on *this. - */ - VectorType intersection(const Hyperplane& other) - { - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) - Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0); - // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests - // whether the two lines are approximately parallel. - if(ei_isMuchSmallerThan(det, Scalar(1))) - { // special case where the two lines are approximately parallel. Pick any point on the first line. - if(ei_abs(coeffs().coeff(1))>ei_abs(coeffs().coeff(0))) - return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0)); - else - return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0)); - } - else - { // general case - Scalar invdet = Scalar(1) / det; - return VectorType(invdet*(coeffs().coeff(1)*other.coeffs().coeff(2)-other.coeffs().coeff(1)*coeffs().coeff(2)), - invdet*(other.coeffs().coeff(0)*coeffs().coeff(2)-coeffs().coeff(0)*other.coeffs().coeff(2))); - } - } - - /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this. - * - * \param mat the Dim x Dim transformation matrix - * \param traits specifies whether the matrix \a mat represents an Isometry - * or a more generic Affine transformation. The default is Affine. - */ - template - inline Hyperplane& transform(const MatrixBase& mat, TransformTraits traits = Affine) - { - if (traits==Affine) - normal() = mat.inverse().transpose() * normal(); - else if (traits==Isometry) - normal() = mat * normal(); - else - { - ei_assert("invalid traits value in Hyperplane::transform()"); - } - return *this; - } - - /** Applies the transformation \a t to \c *this and returns a reference to \c *this. - * - * \param t the transformation of dimension Dim - * \param traits specifies whether the transformation \a t represents an Isometry - * or a more generic Affine transformation. The default is Affine. - * Other kind of transformations are not supported. - */ - inline Hyperplane& transform(const Transform& t, - TransformTraits traits = Affine) - { - transform(t.linear(), traits); - offset() -= t.translation().eigen2_dot(normal()); - return *this; - } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { - return typename internal::cast_return_type >::type(*this); - } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Hyperplane(const Hyperplane& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Hyperplane& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -protected: - - Coefficients m_coeffs; -}; - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h index 9b57b7e0b..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h @@ -1,141 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class ParametrizedLine - * - * \brief A parametrized line - * - * A parametrized line is defined by an origin point \f$ \mathbf{o} \f$ and a unit - * direction vector \f$ \mathbf{d} \f$ such that the line corresponds to - * the set \f$ l(t) = \mathbf{o} + t \mathbf{d} \f$, \f$ l \in \mathbf{R} \f$. - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _AmbientDim the dimension of the ambient space, can be a compile time value or Dynamic. - */ -template -class ParametrizedLine -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) - enum { AmbientDimAtCompileTime = _AmbientDim }; - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - /** Default constructor without initialization */ - inline ParametrizedLine() {} - - /** Constructs a dynamic-size line with \a _dim the dimension - * of the ambient space */ - inline explicit ParametrizedLine(int _dim) : m_origin(_dim), m_direction(_dim) {} - - /** Initializes a parametrized line of direction \a direction and origin \a origin. - * \warning the vector direction is assumed to be normalized. - */ - ParametrizedLine(const VectorType& origin, const VectorType& direction) - : m_origin(origin), m_direction(direction) {} - - explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane); - - /** Constructs a parametrized line going from \a p0 to \a p1. */ - static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1) - { return ParametrizedLine(p0, (p1-p0).normalized()); } - - ~ParametrizedLine() {} - - /** \returns the dimension in which the line holds */ - inline int dim() const { return m_direction.size(); } - - const VectorType& origin() const { return m_origin; } - VectorType& origin() { return m_origin; } - - const VectorType& direction() const { return m_direction; } - VectorType& direction() { return m_direction; } - - /** \returns the squared distance of a point \a p to its projection onto the line \c *this. - * \sa distance() - */ - RealScalar squaredDistance(const VectorType& p) const - { - VectorType diff = p-origin(); - return (diff - diff.eigen2_dot(direction())* direction()).squaredNorm(); - } - /** \returns the distance of a point \a p to its projection onto the line \c *this. - * \sa squaredDistance() - */ - RealScalar distance(const VectorType& p) const { return ei_sqrt(squaredDistance(p)); } - - /** \returns the projection of a point \a p onto the line \c *this. */ - VectorType projection(const VectorType& p) const - { return origin() + (p-origin()).eigen2_dot(direction()) * direction(); } - - Scalar intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane); - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { - return typename internal::cast_return_type >::type(*this); - } - - /** Copy constructor with scalar type conversion */ - template - inline explicit ParametrizedLine(const ParametrizedLine& other) - { - m_origin = other.origin().template cast(); - m_direction = other.direction().template cast(); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const ParametrizedLine& other, typename NumTraits::Real prec = precision()) const - { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); } - -protected: - - VectorType m_origin, m_direction; -}; - -/** Constructs a parametrized line from a 2D hyperplane - * - * \warning the ambient space must have dimension 2 such that the hyperplane actually describes a line - */ -template -inline ParametrizedLine<_Scalar, _AmbientDim>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim>& hyperplane) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) - direction() = hyperplane.normal().unitOrthogonal(); - origin() = -hyperplane.normal()*hyperplane.offset(); -} - -/** \returns the parameter value of the intersection between \c *this and the given hyperplane - */ -template -inline _Scalar ParametrizedLine<_Scalar, _AmbientDim>::intersection(const Hyperplane<_Scalar, _AmbientDim>& hyperplane) -{ - return -(hyperplane.offset()+origin().eigen2_dot(hyperplane.normal())) - /(direction().eigen2_dot(hyperplane.normal())); -} - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Quaternion.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Quaternion.h index 4b6390cf1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Quaternion.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Quaternion.h @@ -1,495 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -template -struct ei_quaternion_assign_impl; - -/** \geometry_module \ingroup Geometry_Module - * - * \class Quaternion - * - * \brief The quaternion class used to represent 3D orientations and rotations - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * - * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of - * orientations and rotations of objects in three dimensions. Compared to other representations - * like Euler angles or 3x3 matrices, quatertions offer the following advantages: - * \li \b compact storage (4 scalars) - * \li \b efficient to compose (28 flops), - * \li \b stable spherical interpolation - * - * The following two typedefs are provided for convenience: - * \li \c Quaternionf for \c float - * \li \c Quaterniond for \c double - * - * \sa class AngleAxis, class Transform - */ - -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class Quaternion : public RotationBase,3> -{ - typedef RotationBase,3> Base; - -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,4) - - using Base::operator*; - - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - - /** the type of the Coefficients 4-vector */ - typedef Matrix Coefficients; - /** the type of a 3D vector */ - typedef Matrix Vector3; - /** the equivalent rotation matrix type */ - typedef Matrix Matrix3; - /** the equivalent angle-axis type */ - typedef AngleAxis AngleAxisType; - - /** \returns the \c x coefficient */ - inline Scalar x() const { return m_coeffs.coeff(0); } - /** \returns the \c y coefficient */ - inline Scalar y() const { return m_coeffs.coeff(1); } - /** \returns the \c z coefficient */ - inline Scalar z() const { return m_coeffs.coeff(2); } - /** \returns the \c w coefficient */ - inline Scalar w() const { return m_coeffs.coeff(3); } - - /** \returns a reference to the \c x coefficient */ - inline Scalar& x() { return m_coeffs.coeffRef(0); } - /** \returns a reference to the \c y coefficient */ - inline Scalar& y() { return m_coeffs.coeffRef(1); } - /** \returns a reference to the \c z coefficient */ - inline Scalar& z() { return m_coeffs.coeffRef(2); } - /** \returns a reference to the \c w coefficient */ - inline Scalar& w() { return m_coeffs.coeffRef(3); } - - /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const Block vec() const { return m_coeffs.template start<3>(); } - - /** \returns a vector expression of the imaginary part (x,y,z) */ - inline Block vec() { return m_coeffs.template start<3>(); } - - /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ - inline const Coefficients& coeffs() const { return m_coeffs; } - - /** \returns a vector expression of the coefficients (x,y,z,w) */ - inline Coefficients& coeffs() { return m_coeffs; } - - /** Default constructor leaving the quaternion uninitialized. */ - inline Quaternion() {} - - /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from - * its four coefficients \a w, \a x, \a y and \a z. - * - * \warning Note the order of the arguments: the real \a w coefficient first, - * while internally the coefficients are stored in the following order: - * [\c x, \c y, \c z, \c w] - */ - inline Quaternion(Scalar w, Scalar x, Scalar y, Scalar z) - { m_coeffs << x, y, z, w; } - - /** Copy constructor */ - inline Quaternion(const Quaternion& other) { m_coeffs = other.m_coeffs; } - - /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } - - /** Constructs and initializes a quaternion from either: - * - a rotation matrix expression, - * - a 4D vector expression representing quaternion coefficients. - * \sa operator=(MatrixBase) - */ - template - explicit inline Quaternion(const MatrixBase& other) { *this = other; } - - Quaternion& operator=(const Quaternion& other); - Quaternion& operator=(const AngleAxisType& aa); - template - Quaternion& operator=(const MatrixBase& m); - - /** \returns a quaternion representing an identity rotation - * \sa MatrixBase::Identity() - */ - static inline Quaternion Identity() { return Quaternion(1, 0, 0, 0); } - - /** \sa Quaternion::Identity(), MatrixBase::setIdentity() - */ - inline Quaternion& setIdentity() { m_coeffs << 0, 0, 0, 1; return *this; } - - /** \returns the squared norm of the quaternion's coefficients - * \sa Quaternion::norm(), MatrixBase::squaredNorm() - */ - inline Scalar squaredNorm() const { return m_coeffs.squaredNorm(); } - - /** \returns the norm of the quaternion's coefficients - * \sa Quaternion::squaredNorm(), MatrixBase::norm() - */ - inline Scalar norm() const { return m_coeffs.norm(); } - - /** Normalizes the quaternion \c *this - * \sa normalized(), MatrixBase::normalize() */ - inline void normalize() { m_coeffs.normalize(); } - /** \returns a normalized version of \c *this - * \sa normalize(), MatrixBase::normalized() */ - inline Quaternion normalized() const { return Quaternion(m_coeffs.normalized()); } - - /** \returns the dot product of \c *this and \a other - * Geometrically speaking, the dot product of two unit quaternions - * corresponds to the cosine of half the angle between the two rotations. - * \sa angularDistance() - */ - inline Scalar eigen2_dot(const Quaternion& other) const { return m_coeffs.eigen2_dot(other.m_coeffs); } - - inline Scalar angularDistance(const Quaternion& other) const; - - Matrix3 toRotationMatrix(void) const; - - template - Quaternion& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - - inline Quaternion operator* (const Quaternion& q) const; - inline Quaternion& operator*= (const Quaternion& q); - - Quaternion inverse(void) const; - Quaternion conjugate(void) const; - - Quaternion slerp(Scalar t, const Quaternion& other) const; - - template - Vector3 operator* (const MatrixBase& vec) const; - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Quaternion(const Quaternion& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Quaternion& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -protected: - Coefficients m_coeffs; -}; - -/** \ingroup Geometry_Module - * single precision quaternion type */ -typedef Quaternion Quaternionf; -/** \ingroup Geometry_Module - * double precision quaternion type */ -typedef Quaternion Quaterniond; - -// Generic Quaternion * Quaternion product -template inline Quaternion -ei_quaternion_product(const Quaternion& a, const Quaternion& b) -{ - return Quaternion - ( - a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), - a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), - a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), - a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() - ); -} - -/** \returns the concatenation of two rotations as a quaternion-quaternion product */ -template -inline Quaternion Quaternion::operator* (const Quaternion& other) const -{ - return ei_quaternion_product(*this,other); -} - -/** \sa operator*(Quaternion) */ -template -inline Quaternion& Quaternion::operator*= (const Quaternion& other) -{ - return (*this = *this * other); -} - -/** Rotation of a vector by a quaternion. - * \remarks If the quaternion is used to rotate several points (>1) - * then it is much more efficient to first convert it to a 3x3 Matrix. - * Comparison of the operation cost for n transformations: - * - Quaternion: 30n - * - Via a Matrix3: 24 + 15n - */ -template -template -inline typename Quaternion::Vector3 -Quaternion::operator* (const MatrixBase& v) const -{ - // Note that this algorithm comes from the optimization by hand - // of the conversion to a Matrix followed by a Matrix/Vector product. - // It appears to be much faster than the common algorithm found - // in the litterature (30 versus 39 flops). It also requires two - // Vector3 as temporaries. - Vector3 uv; - uv = 2 * this->vec().cross(v); - return v + this->w() * uv + this->vec().cross(uv); -} - -template -inline Quaternion& Quaternion::operator=(const Quaternion& other) -{ - m_coeffs = other.m_coeffs; - return *this; -} - -/** Set \c *this from an angle-axis \a aa and returns a reference to \c *this - */ -template -inline Quaternion& Quaternion::operator=(const AngleAxisType& aa) -{ - Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings - this->w() = ei_cos(ha); - this->vec() = ei_sin(ha) * aa.axis(); - return *this; -} - -/** Set \c *this from the expression \a xpr: - * - if \a xpr is a 4x1 vector, then \a xpr is assumed to be a quaternion - * - if \a xpr is a 3x3 matrix, then \a xpr is assumed to be rotation matrix - * and \a xpr is converted to a quaternion - */ -template -template -inline Quaternion& Quaternion::operator=(const MatrixBase& xpr) -{ - ei_quaternion_assign_impl::run(*this, xpr.derived()); - return *this; -} - -/** Convert the quaternion to a 3x3 rotation matrix */ -template -inline typename Quaternion::Matrix3 -Quaternion::toRotationMatrix(void) const -{ - // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) - // if not inlined then the cost of the return by value is huge ~ +35%, - // however, not inlining this function is an order of magnitude slower, so - // it has to be inlined, and so the return by value is not an issue - Matrix3 res; - - const Scalar tx = Scalar(2)*this->x(); - const Scalar ty = Scalar(2)*this->y(); - const Scalar tz = Scalar(2)*this->z(); - const Scalar twx = tx*this->w(); - const Scalar twy = ty*this->w(); - const Scalar twz = tz*this->w(); - const Scalar txx = tx*this->x(); - const Scalar txy = ty*this->x(); - const Scalar txz = tz*this->x(); - const Scalar tyy = ty*this->y(); - const Scalar tyz = tz*this->y(); - const Scalar tzz = tz*this->z(); - - res.coeffRef(0,0) = Scalar(1)-(tyy+tzz); - res.coeffRef(0,1) = txy-twz; - res.coeffRef(0,2) = txz+twy; - res.coeffRef(1,0) = txy+twz; - res.coeffRef(1,1) = Scalar(1)-(txx+tzz); - res.coeffRef(1,2) = tyz-twx; - res.coeffRef(2,0) = txz-twy; - res.coeffRef(2,1) = tyz+twx; - res.coeffRef(2,2) = Scalar(1)-(txx+tyy); - - return res; -} - -/** Sets *this to be a quaternion representing a rotation sending the vector \a a to the vector \a b. - * - * \returns a reference to *this. - * - * Note that the two input vectors do \b not have to be normalized. - */ -template -template -inline Quaternion& Quaternion::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) -{ - Vector3 v0 = a.normalized(); - Vector3 v1 = b.normalized(); - Scalar c = v0.eigen2_dot(v1); - - // if dot == 1, vectors are the same - if (ei_isApprox(c,Scalar(1))) - { - // set to identity - this->w() = 1; this->vec().setZero(); - return *this; - } - // if dot == -1, vectors are opposites - if (ei_isApprox(c,Scalar(-1))) - { - this->vec() = v0.unitOrthogonal(); - this->w() = 0; - return *this; - } - - Vector3 axis = v0.cross(v1); - Scalar s = ei_sqrt((Scalar(1)+c)*Scalar(2)); - Scalar invs = Scalar(1)/s; - this->vec() = axis * invs; - this->w() = s * Scalar(0.5); - - return *this; -} - -/** \returns the multiplicative inverse of \c *this - * Note that in most cases, i.e., if you simply want the opposite rotation, - * and/or the quaternion is normalized, then it is enough to use the conjugate. - * - * \sa Quaternion::conjugate() - */ -template -inline Quaternion Quaternion::inverse() const -{ - // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? - Scalar n2 = this->squaredNorm(); - if (n2 > 0) - return Quaternion(conjugate().coeffs() / n2); - else - { - // return an invalid result to flag the error - return Quaternion(Coefficients::Zero()); - } -} - -/** \returns the conjugate of the \c *this which is equal to the multiplicative inverse - * if the quaternion is normalized. - * The conjugate of a quaternion represents the opposite rotation. - * - * \sa Quaternion::inverse() - */ -template -inline Quaternion Quaternion::conjugate() const -{ - return Quaternion(this->w(),-this->x(),-this->y(),-this->z()); -} - -/** \returns the angle (in radian) between two rotations - * \sa eigen2_dot() - */ -template -inline Scalar Quaternion::angularDistance(const Quaternion& other) const -{ - double d = ei_abs(this->eigen2_dot(other)); - if (d>=1.0) - return 0; - return Scalar(2) * std::acos(d); -} - -/** \returns the spherical linear interpolation between the two quaternions - * \c *this and \a other at the parameter \a t - */ -template -Quaternion Quaternion::slerp(Scalar t, const Quaternion& other) const -{ - static const Scalar one = Scalar(1) - machine_epsilon(); - Scalar d = this->eigen2_dot(other); - Scalar absD = ei_abs(d); - - Scalar scale0; - Scalar scale1; - - if (absD>=one) - { - scale0 = Scalar(1) - t; - scale1 = t; - } - else - { - // theta is the angle between the 2 quaternions - Scalar theta = std::acos(absD); - Scalar sinTheta = ei_sin(theta); - - scale0 = ei_sin( ( Scalar(1) - t ) * theta) / sinTheta; - scale1 = ei_sin( ( t * theta) ) / sinTheta; - if (d<0) - scale1 = -scale1; - } - - return Quaternion(scale0 * coeffs() + scale1 * other.coeffs()); -} - -// set from a rotation matrix -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - static inline void run(Quaternion& q, const Other& mat) - { - // This algorithm comes from "Quaternion Calculus and Fast Animation", - // Ken Shoemake, 1987 SIGGRAPH course notes - Scalar t = mat.trace(); - if (t > 0) - { - t = ei_sqrt(t + Scalar(1.0)); - q.w() = Scalar(0.5)*t; - t = Scalar(0.5)/t; - q.x() = (mat.coeff(2,1) - mat.coeff(1,2)) * t; - q.y() = (mat.coeff(0,2) - mat.coeff(2,0)) * t; - q.z() = (mat.coeff(1,0) - mat.coeff(0,1)) * t; - } - else - { - int i = 0; - if (mat.coeff(1,1) > mat.coeff(0,0)) - i = 1; - if (mat.coeff(2,2) > mat.coeff(i,i)) - i = 2; - int j = (i+1)%3; - int k = (j+1)%3; - - t = ei_sqrt(mat.coeff(i,i)-mat.coeff(j,j)-mat.coeff(k,k) + Scalar(1.0)); - q.coeffs().coeffRef(i) = Scalar(0.5) * t; - t = Scalar(0.5)/t; - q.w() = (mat.coeff(k,j)-mat.coeff(j,k))*t; - q.coeffs().coeffRef(j) = (mat.coeff(j,i)+mat.coeff(i,j))*t; - q.coeffs().coeffRef(k) = (mat.coeff(k,i)+mat.coeff(i,k))*t; - } - } -}; - -// set from a vector of coefficients assumed to be a quaternion -template -struct ei_quaternion_assign_impl -{ - typedef typename Other::Scalar Scalar; - static inline void run(Quaternion& q, const Other& vec) - { - q.coeffs() = vec; - } -}; - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Rotation2D.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Rotation2D.h index 19b8582a1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Rotation2D.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Rotation2D.h @@ -1,145 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Rotation2D - * - * \brief Represents a rotation/orientation in a 2 dimensional space. - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * - * This class is equivalent to a single scalar representing a counter clock wise rotation - * as a single angle in radian. It provides some additional features such as the automatic - * conversion from/to a 2x2 rotation matrix. Moreover this class aims to provide a similar - * interface to Quaternion in order to facilitate the writing of generic algorithms - * dealing with rotations. - * - * \sa class Quaternion, class Transform - */ -template struct ei_traits > -{ - typedef _Scalar Scalar; -}; - -template -class Rotation2D : public RotationBase,2> -{ - typedef RotationBase,2> Base; - -public: - - using Base::operator*; - - enum { Dim = 2 }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - typedef Matrix Vector2; - typedef Matrix Matrix2; - -protected: - - Scalar m_angle; - -public: - - /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */ - inline Rotation2D(Scalar a) : m_angle(a) {} - - /** \returns the rotation angle */ - inline Scalar angle() const { return m_angle; } - - /** \returns a read-write reference to the rotation angle */ - inline Scalar& angle() { return m_angle; } - - /** \returns the inverse rotation */ - inline Rotation2D inverse() const { return -m_angle; } - - /** Concatenates two rotations */ - inline Rotation2D operator*(const Rotation2D& other) const - { return m_angle + other.m_angle; } - - /** Concatenates two rotations */ - inline Rotation2D& operator*=(const Rotation2D& other) - { return m_angle += other.m_angle; return *this; } - - /** Applies the rotation to a 2D vector */ - Vector2 operator* (const Vector2& vec) const - { return toRotationMatrix() * vec; } - - template - Rotation2D& fromRotationMatrix(const MatrixBase& m); - Matrix2 toRotationMatrix(void) const; - - /** \returns the spherical interpolation between \c *this and \a other using - * parameter \a t. It is in fact equivalent to a linear interpolation. - */ - inline Rotation2D slerp(Scalar t, const Rotation2D& other) const - { return m_angle * (1-t) + other.angle() * t; } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Rotation2D(const Rotation2D& other) - { - m_angle = Scalar(other.angle()); - } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Rotation2D& other, typename NumTraits::Real prec = precision()) const - { return ei_isApprox(m_angle,other.m_angle, prec); } -}; - -/** \ingroup Geometry_Module - * single precision 2D rotation type */ -typedef Rotation2D Rotation2Df; -/** \ingroup Geometry_Module - * double precision 2D rotation type */ -typedef Rotation2D Rotation2Dd; - -/** Set \c *this from a 2x2 rotation matrix \a mat. - * In other words, this function extract the rotation angle - * from the rotation matrix. - */ -template -template -Rotation2D& Rotation2D::fromRotationMatrix(const MatrixBase& mat) -{ - EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE) - m_angle = ei_atan2(mat.coeff(1,0), mat.coeff(0,0)); - return *this; -} - -/** Constructs and \returns an equivalent 2x2 rotation matrix. - */ -template -typename Rotation2D::Matrix2 -Rotation2D::toRotationMatrix(void) const -{ - Scalar sinA = ei_sin(m_angle); - Scalar cosA = ei_cos(m_angle); - return (Matrix2() << cosA, -sinA, sinA, cosA).finished(); -} - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/RotationBase.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/RotationBase.h index b1c8f38da..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/RotationBase.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/RotationBase.h @@ -1,123 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -// this file aims to contains the various representations of rotation/orientation -// in 2D and 3D space excepted Matrix and Quaternion. - -/** \class RotationBase - * - * \brief Common base class for compact rotation representations - * - * \param Derived is the derived type, i.e., a rotation type - * \param _Dim the dimension of the space - */ -template -class RotationBase -{ - public: - enum { Dim = _Dim }; - /** the scalar type of the coefficients */ - typedef typename ei_traits::Scalar Scalar; - - /** corresponding linear transformation matrix type */ - typedef Matrix RotationMatrixType; - - inline const Derived& derived() const { return *static_cast(this); } - inline Derived& derived() { return *static_cast(this); } - - /** \returns an equivalent rotation matrix */ - inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); } - - /** \returns the inverse rotation */ - inline Derived inverse() const { return derived().inverse(); } - - /** \returns the concatenation of the rotation \c *this with a translation \a t */ - inline Transform operator*(const Translation& t) const - { return toRotationMatrix() * t; } - - /** \returns the concatenation of the rotation \c *this with a scaling \a s */ - inline RotationMatrixType operator*(const Scaling& s) const - { return toRotationMatrix() * s; } - - /** \returns the concatenation of the rotation \c *this with an affine transformation \a t */ - inline Transform operator*(const Transform& t) const - { return toRotationMatrix() * t; } -}; - -/** \geometry_module - * - * Constructs a Dim x Dim rotation matrix from the rotation \a r - */ -template -template -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> -::Matrix(const RotationBase& r) -{ - EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim)) - *this = r.toRotationMatrix(); -} - -/** \geometry_module - * - * Set a Dim x Dim rotation matrix from the rotation \a r - */ -template -template -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>& -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> -::operator=(const RotationBase& r) -{ - EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim)) - return *this = r.toRotationMatrix(); -} - -/** \internal - * - * Helper function to return an arbitrary rotation object to a rotation matrix. - * - * \param Scalar the numeric type of the matrix coefficients - * \param Dim the dimension of the current space - * - * It returns a Dim x Dim fixed size matrix. - * - * Default specializations are provided for: - * - any scalar type (2D), - * - any matrix expression, - * - any type based on RotationBase (e.g., Quaternion, AngleAxis, Rotation2D) - * - * Currently ei_toRotationMatrix is only used by Transform. - * - * \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis - */ -template -static inline Matrix ei_toRotationMatrix(const Scalar& s) -{ - EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE) - return Rotation2D(s).toRotationMatrix(); -} - -template -static inline Matrix ei_toRotationMatrix(const RotationBase& r) -{ - return r.toRotationMatrix(); -} - -template -static inline const MatrixBase& ei_toRotationMatrix(const MatrixBase& mat) -{ - EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim, - YOU_MADE_A_PROGRAMMING_MISTAKE) - return mat; -} - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Scaling.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Scaling.h index b8fa6cd3f..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Scaling.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Scaling.h @@ -1,167 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Scaling - * - * \brief Represents a possibly non uniform scaling transformation - * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * \param _Dim the dimension of the space, can be a compile time value or Dynamic - * - * \note This class is not aimed to be used to store a scaling transformation, - * but rather to make easier the constructions and updates of Transform objects. - * - * \sa class Translation, class Transform - */ -template -class Scaling -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim) - /** dimension of the space */ - enum { Dim = _Dim }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - /** corresponding vector type */ - typedef Matrix VectorType; - /** corresponding linear transformation matrix type */ - typedef Matrix LinearMatrixType; - /** corresponding translation type */ - typedef Translation TranslationType; - /** corresponding affine transformation type */ - typedef Transform TransformType; - -protected: - - VectorType m_coeffs; - -public: - - /** Default constructor without initialization. */ - Scaling() {} - /** Constructs and initialize a uniform scaling transformation */ - explicit inline Scaling(const Scalar& s) { m_coeffs.setConstant(s); } - /** 2D only */ - inline Scaling(const Scalar& sx, const Scalar& sy) - { - ei_assert(Dim==2); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - } - /** 3D only */ - inline Scaling(const Scalar& sx, const Scalar& sy, const Scalar& sz) - { - ei_assert(Dim==3); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - m_coeffs.z() = sz; - } - /** Constructs and initialize the scaling transformation from a vector of scaling coefficients */ - explicit inline Scaling(const VectorType& coeffs) : m_coeffs(coeffs) {} - - const VectorType& coeffs() const { return m_coeffs; } - VectorType& coeffs() { return m_coeffs; } - - /** Concatenates two scaling */ - inline Scaling operator* (const Scaling& other) const - { return Scaling(coeffs().cwise() * other.coeffs()); } - - /** Concatenates a scaling and a translation */ - inline TransformType operator* (const TranslationType& t) const; - - /** Concatenates a scaling and an affine transformation */ - inline TransformType operator* (const TransformType& t) const; - - /** Concatenates a scaling and a linear transformation matrix */ - // TODO returns an expression - inline LinearMatrixType operator* (const LinearMatrixType& other) const - { return coeffs().asDiagonal() * other; } - - /** Concatenates a linear transformation matrix and a scaling */ - // TODO returns an expression - friend inline LinearMatrixType operator* (const LinearMatrixType& other, const Scaling& s) - { return other * s.coeffs().asDiagonal(); } - - template - inline LinearMatrixType operator*(const RotationBase& r) const - { return *this * r.toRotationMatrix(); } - - /** Applies scaling to vector */ - inline VectorType operator* (const VectorType& other) const - { return coeffs().asDiagonal() * other; } - - /** \returns the inverse scaling */ - inline Scaling inverse() const - { return Scaling(coeffs().cwise().inverse()); } - - inline Scaling& operator=(const Scaling& other) - { - m_coeffs = other.m_coeffs; - return *this; - } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Scaling(const Scaling& other) - { m_coeffs = other.coeffs().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Scaling& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -}; - -/** \addtogroup Geometry_Module */ -//@{ -typedef Scaling Scaling2f; -typedef Scaling Scaling2d; -typedef Scaling Scaling3f; -typedef Scaling Scaling3d; -//@} - -template -inline typename Scaling::TransformType -Scaling::operator* (const TranslationType& t) const -{ - TransformType res; - res.matrix().setZero(); - res.linear().diagonal() = coeffs(); - res.translation() = m_coeffs.cwise() * t.vector(); - res(Dim,Dim) = Scalar(1); - return res; -} - -template -inline typename Scaling::TransformType -Scaling::operator* (const TransformType& t) const -{ - TransformType res = t; - res.prescale(m_coeffs); - return res; -} - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Transform.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Transform.h index fab60b251..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Transform.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Transform.h @@ -1,786 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -// Note that we have to pass Dim and HDim because it is not allowed to use a template -// parameter to define a template specialization. To be more precise, in the following -// specializations, it is not allowed to use Dim+1 instead of HDim. -template< typename Other, - int Dim, - int HDim, - int OtherRows=Other::RowsAtCompileTime, - int OtherCols=Other::ColsAtCompileTime> -struct ei_transform_product_impl; - -/** \geometry_module \ingroup Geometry_Module - * - * \class Transform - * - * \brief Represents an homogeneous transformation in a N dimensional space - * - * \param _Scalar the scalar type, i.e., the type of the coefficients - * \param _Dim the dimension of the space - * - * The homography is internally represented and stored as a (Dim+1)^2 matrix which - * is available through the matrix() method. - * - * Conversion methods from/to Qt's QMatrix and QTransform are available if the - * preprocessor token EIGEN_QT_SUPPORT is defined. - * - * \sa class Matrix, class Quaternion - */ -template -class Transform -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim==Dynamic ? Dynamic : (_Dim+1)*(_Dim+1)) - enum { - Dim = _Dim, ///< space dimension in which the transformation holds - HDim = _Dim+1 ///< size of a respective homogeneous vector - }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - /** type of the matrix used to represent the transformation */ - typedef Matrix MatrixType; - /** type of the matrix used to represent the linear part of the transformation */ - typedef Matrix LinearMatrixType; - /** type of read/write reference to the linear part of the transformation */ - typedef Block LinearPart; - /** type of read/write reference to the linear part of the transformation */ - typedef const Block ConstLinearPart; - /** type of a vector */ - typedef Matrix VectorType; - /** type of a read/write reference to the translation part of the rotation */ - typedef Block TranslationPart; - /** type of a read/write reference to the translation part of the rotation */ - typedef const Block ConstTranslationPart; - /** corresponding translation type */ - typedef Translation TranslationType; - /** corresponding scaling transformation type */ - typedef Scaling ScalingType; - -protected: - - MatrixType m_matrix; - -public: - - /** Default constructor without initialization of the coefficients. */ - inline Transform() { } - - inline Transform(const Transform& other) - { - m_matrix = other.m_matrix; - } - - inline explicit Transform(const TranslationType& t) { *this = t; } - inline explicit Transform(const ScalingType& s) { *this = s; } - template - inline explicit Transform(const RotationBase& r) { *this = r; } - - inline Transform& operator=(const Transform& other) - { m_matrix = other.m_matrix; return *this; } - - template // MSVC 2005 will commit suicide if BigMatrix has a default value - struct construct_from_matrix - { - static inline void run(Transform *transform, const MatrixBase& other) - { - transform->matrix() = other; - } - }; - - template struct construct_from_matrix - { - static inline void run(Transform *transform, const MatrixBase& other) - { - transform->linear() = other; - transform->translation().setZero(); - transform->matrix()(Dim,Dim) = Scalar(1); - transform->matrix().template block<1,Dim>(Dim,0).setZero(); - } - }; - - /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */ - template - inline explicit Transform(const MatrixBase& other) - { - construct_from_matrix::run(this, other); - } - - /** Set \c *this from a (Dim+1)^2 matrix. */ - template - inline Transform& operator=(const MatrixBase& other) - { m_matrix = other; return *this; } - - #ifdef EIGEN_QT_SUPPORT - inline Transform(const QMatrix& other); - inline Transform& operator=(const QMatrix& other); - inline QMatrix toQMatrix(void) const; - inline Transform(const QTransform& other); - inline Transform& operator=(const QTransform& other); - inline QTransform toQTransform(void) const; - #endif - - /** shortcut for m_matrix(row,col); - * \sa MatrixBase::operaror(int,int) const */ - inline Scalar operator() (int row, int col) const { return m_matrix(row,col); } - /** shortcut for m_matrix(row,col); - * \sa MatrixBase::operaror(int,int) */ - inline Scalar& operator() (int row, int col) { return m_matrix(row,col); } - - /** \returns a read-only expression of the transformation matrix */ - inline const MatrixType& matrix() const { return m_matrix; } - /** \returns a writable expression of the transformation matrix */ - inline MatrixType& matrix() { return m_matrix; } - - /** \returns a read-only expression of the linear (linear) part of the transformation */ - inline ConstLinearPart linear() const { return m_matrix.template block(0,0); } - /** \returns a writable expression of the linear (linear) part of the transformation */ - inline LinearPart linear() { return m_matrix.template block(0,0); } - - /** \returns a read-only expression of the translation vector of the transformation */ - inline ConstTranslationPart translation() const { return m_matrix.template block(0,Dim); } - /** \returns a writable expression of the translation vector of the transformation */ - inline TranslationPart translation() { return m_matrix.template block(0,Dim); } - - /** \returns an expression of the product between the transform \c *this and a matrix expression \a other - * - * The right hand side \a other might be either: - * \li a vector of size Dim, - * \li an homogeneous vector of size Dim+1, - * \li a transformation matrix of size Dim+1 x Dim+1. - */ - // note: this function is defined here because some compilers cannot find the respective declaration - template - inline const typename ei_transform_product_impl::ResultType - operator * (const MatrixBase &other) const - { return ei_transform_product_impl::run(*this,other.derived()); } - - /** \returns the product expression of a transformation matrix \a a times a transform \a b - * The transformation matrix \a a must have a Dim+1 x Dim+1 sizes. */ - template - friend inline const typename ProductReturnType::Type - operator * (const MatrixBase &a, const Transform &b) - { return a.derived() * b.matrix(); } - - /** Contatenates two transformations */ - inline const Transform - operator * (const Transform& other) const - { return Transform(m_matrix * other.matrix()); } - - /** \sa MatrixBase::setIdentity() */ - void setIdentity() { m_matrix.setIdentity(); } - static const typename MatrixType::IdentityReturnType Identity() - { - return MatrixType::Identity(); - } - - template - inline Transform& scale(const MatrixBase &other); - - template - inline Transform& prescale(const MatrixBase &other); - - inline Transform& scale(Scalar s); - inline Transform& prescale(Scalar s); - - template - inline Transform& translate(const MatrixBase &other); - - template - inline Transform& pretranslate(const MatrixBase &other); - - template - inline Transform& rotate(const RotationType& rotation); - - template - inline Transform& prerotate(const RotationType& rotation); - - Transform& shear(Scalar sx, Scalar sy); - Transform& preshear(Scalar sx, Scalar sy); - - inline Transform& operator=(const TranslationType& t); - inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); } - inline Transform operator*(const TranslationType& t) const; - - inline Transform& operator=(const ScalingType& t); - inline Transform& operator*=(const ScalingType& s) { return scale(s.coeffs()); } - inline Transform operator*(const ScalingType& s) const; - friend inline Transform operator*(const LinearMatrixType& mat, const Transform& t) - { - Transform res = t; - res.matrix().row(Dim) = t.matrix().row(Dim); - res.matrix().template block(0,0) = (mat * t.matrix().template block(0,0)).lazy(); - return res; - } - - template - inline Transform& operator=(const RotationBase& r); - template - inline Transform& operator*=(const RotationBase& r) { return rotate(r.toRotationMatrix()); } - template - inline Transform operator*(const RotationBase& r) const; - - LinearMatrixType rotation() const; - template - void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const; - template - void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const; - - template - Transform& fromPositionOrientationScale(const MatrixBase &position, - const OrientationType& orientation, const MatrixBase &scale); - - inline const MatrixType inverse(TransformTraits traits = Affine) const; - - /** \returns a const pointer to the column major internal matrix */ - const Scalar* data() const { return m_matrix.data(); } - /** \returns a non-const pointer to the column major internal matrix */ - Scalar* data() { return m_matrix.data(); } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Transform(const Transform& other) - { m_matrix = other.matrix().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Transform& other, typename NumTraits::Real prec = precision()) const - { return m_matrix.isApprox(other.m_matrix, prec); } - - #ifdef EIGEN_TRANSFORM_PLUGIN - #include EIGEN_TRANSFORM_PLUGIN - #endif - -protected: - -}; - -/** \ingroup Geometry_Module */ -typedef Transform Transform2f; -/** \ingroup Geometry_Module */ -typedef Transform Transform3f; -/** \ingroup Geometry_Module */ -typedef Transform Transform2d; -/** \ingroup Geometry_Module */ -typedef Transform Transform3d; - -/************************** -*** Optional QT support *** -**************************/ - -#ifdef EIGEN_QT_SUPPORT -/** Initialises \c *this from a QMatrix assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform::Transform(const QMatrix& other) -{ - *this = other; -} - -/** Set \c *this from a QMatrix assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform& Transform::operator=(const QMatrix& other) -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - m_matrix << other.m11(), other.m21(), other.dx(), - other.m12(), other.m22(), other.dy(), - 0, 0, 1; - return *this; -} - -/** \returns a QMatrix from \c *this assuming the dimension is 2. - * - * \warning this convertion might loss data if \c *this is not affine - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -QMatrix Transform::toQMatrix(void) const -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - return QMatrix(m_matrix.coeff(0,0), m_matrix.coeff(1,0), - m_matrix.coeff(0,1), m_matrix.coeff(1,1), - m_matrix.coeff(0,2), m_matrix.coeff(1,2)); -} - -/** Initialises \c *this from a QTransform assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform::Transform(const QTransform& other) -{ - *this = other; -} - -/** Set \c *this from a QTransform assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -Transform& Transform::operator=(const QTransform& other) -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - m_matrix << other.m11(), other.m21(), other.dx(), - other.m12(), other.m22(), other.dy(), - other.m13(), other.m23(), other.m33(); - return *this; -} - -/** \returns a QTransform from \c *this assuming the dimension is 2. - * - * This function is available only if the token EIGEN_QT_SUPPORT is defined. - */ -template -QTransform Transform::toQTransform(void) const -{ - EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(2,0), - m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(2,1), - m_matrix.coeff(0,2), m_matrix.coeff(1,2), m_matrix.coeff(2,2)); -} -#endif - -/********************* -*** Procedural API *** -*********************/ - -/** Applies on the right the non uniform scale transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \sa prescale() - */ -template -template -Transform& -Transform::scale(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - linear() = (linear() * other.asDiagonal()).lazy(); - return *this; -} - -/** Applies on the right a uniform scale of a factor \a c to \c *this - * and returns a reference to \c *this. - * \sa prescale(Scalar) - */ -template -inline Transform& Transform::scale(Scalar s) -{ - linear() *= s; - return *this; -} - -/** Applies on the left the non uniform scale transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \sa scale() - */ -template -template -Transform& -Transform::prescale(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - m_matrix.template block(0,0) = (other.asDiagonal() * m_matrix.template block(0,0)).lazy(); - return *this; -} - -/** Applies on the left a uniform scale of a factor \a c to \c *this - * and returns a reference to \c *this. - * \sa scale(Scalar) - */ -template -inline Transform& Transform::prescale(Scalar s) -{ - m_matrix.template corner(TopLeft) *= s; - return *this; -} - -/** Applies on the right the translation matrix represented by the vector \a other - * to \c *this and returns a reference to \c *this. - * \sa pretranslate() - */ -template -template -Transform& -Transform::translate(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - translation() += linear() * other; - return *this; -} - -/** Applies on the left the translation matrix represented by the vector \a other - * to \c *this and returns a reference to \c *this. - * \sa translate() - */ -template -template -Transform& -Transform::pretranslate(const MatrixBase &other) -{ - EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - translation() += other; - return *this; -} - -/** Applies on the right the rotation represented by the rotation \a rotation - * to \c *this and returns a reference to \c *this. - * - * The template parameter \a RotationType is the type of the rotation which - * must be known by ei_toRotationMatrix<>. - * - * Natively supported types includes: - * - any scalar (2D), - * - a Dim x Dim matrix expression, - * - a Quaternion (3D), - * - a AngleAxis (3D) - * - * This mechanism is easily extendable to support user types such as Euler angles, - * or a pair of Quaternion for 4D rotations. - * - * \sa rotate(Scalar), class Quaternion, class AngleAxis, prerotate(RotationType) - */ -template -template -Transform& -Transform::rotate(const RotationType& rotation) -{ - linear() *= ei_toRotationMatrix(rotation); - return *this; -} - -/** Applies on the left the rotation represented by the rotation \a rotation - * to \c *this and returns a reference to \c *this. - * - * See rotate() for further details. - * - * \sa rotate() - */ -template -template -Transform& -Transform::prerotate(const RotationType& rotation) -{ - m_matrix.template block(0,0) = ei_toRotationMatrix(rotation) - * m_matrix.template block(0,0); - return *this; -} - -/** Applies on the right the shear transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \warning 2D only. - * \sa preshear() - */ -template -Transform& -Transform::shear(Scalar sx, Scalar sy) -{ - EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - VectorType tmp = linear().col(0)*sy + linear().col(1); - linear() << linear().col(0) + linear().col(1)*sx, tmp; - return *this; -} - -/** Applies on the left the shear transformation represented - * by the vector \a other to \c *this and returns a reference to \c *this. - * \warning 2D only. - * \sa shear() - */ -template -Transform& -Transform::preshear(Scalar sx, Scalar sy) -{ - EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - m_matrix.template block(0,0) = LinearMatrixType(1, sx, sy, 1) * m_matrix.template block(0,0); - return *this; -} - -/****************************************************** -*** Scaling, Translation and Rotation compatibility *** -******************************************************/ - -template -inline Transform& Transform::operator=(const TranslationType& t) -{ - linear().setIdentity(); - translation() = t.vector(); - m_matrix.template block<1,Dim>(Dim,0).setZero(); - m_matrix(Dim,Dim) = Scalar(1); - return *this; -} - -template -inline Transform Transform::operator*(const TranslationType& t) const -{ - Transform res = *this; - res.translate(t.vector()); - return res; -} - -template -inline Transform& Transform::operator=(const ScalingType& s) -{ - m_matrix.setZero(); - linear().diagonal() = s.coeffs(); - m_matrix.coeffRef(Dim,Dim) = Scalar(1); - return *this; -} - -template -inline Transform Transform::operator*(const ScalingType& s) const -{ - Transform res = *this; - res.scale(s.coeffs()); - return res; -} - -template -template -inline Transform& Transform::operator=(const RotationBase& r) -{ - linear() = ei_toRotationMatrix(r); - translation().setZero(); - m_matrix.template block<1,Dim>(Dim,0).setZero(); - m_matrix.coeffRef(Dim,Dim) = Scalar(1); - return *this; -} - -template -template -inline Transform Transform::operator*(const RotationBase& r) const -{ - Transform res = *this; - res.rotate(r.derived()); - return res; -} - -/************************ -*** Special functions *** -************************/ - -/** \returns the rotation part of the transformation - * \nonstableyet - * - * \svd_module - * - * \sa computeRotationScaling(), computeScalingRotation(), class SVD - */ -template -typename Transform::LinearMatrixType -Transform::rotation() const -{ - LinearMatrixType result; - computeRotationScaling(&result, (LinearMatrixType*)0); - return result; -} - - -/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * \nonstableyet - * - * \svd_module - * - * \sa computeScalingRotation(), rotation(), class SVD - */ -template -template -void Transform::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const -{ - JacobiSVD svd(linear(), ComputeFullU|ComputeFullV); - Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(svd.singularValues()); - sv.coeffRef(0) *= x; - if(scaling) - { - scaling->noalias() = svd.matrixV() * sv.asDiagonal() * svd.matrixV().adjoint(); - } - if(rotation) - { - LinearMatrixType m(svd.matrixU()); - m.col(0) /= x; - rotation->noalias() = m * svd.matrixV().adjoint(); - } -} - -/** decomposes the linear part of the transformation as a product rotation x scaling, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * \nonstableyet - * - * \svd_module - * - * \sa computeRotationScaling(), rotation(), class SVD - */ -template -template -void Transform::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const -{ - JacobiSVD svd(linear(), ComputeFullU|ComputeFullV); - Scalar x = (svd.matrixU() * svd.matrixV().adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(svd.singularValues()); - sv.coeffRef(0) *= x; - if(scaling) - { - scaling->noalias() = svd.matrixU() * sv.asDiagonal() * svd.matrixU().adjoint(); - } - if(rotation) - { - LinearMatrixType m(svd.matrixU()); - m.col(0) /= x; - rotation->noalias() = m * svd.matrixV().adjoint(); - } -} - -/** Convenient method to set \c *this from a position, orientation and scale - * of a 3D object. - */ -template -template -Transform& -Transform::fromPositionOrientationScale(const MatrixBase &position, - const OrientationType& orientation, const MatrixBase &scale) -{ - linear() = ei_toRotationMatrix(orientation); - linear() *= scale.asDiagonal(); - translation() = position; - m_matrix.template block<1,Dim>(Dim,0).setZero(); - m_matrix(Dim,Dim) = Scalar(1); - return *this; -} - -/** \nonstableyet - * - * \returns the inverse transformation matrix according to some given knowledge - * on \c *this. - * - * \param traits allows to optimize the inversion process when the transformion - * is known to be not a general transformation. The possible values are: - * - Projective if the transformation is not necessarily affine, i.e., if the - * last row is not guaranteed to be [0 ... 0 1] - * - Affine is the default, the last row is assumed to be [0 ... 0 1] - * - Isometry if the transformation is only a concatenations of translations - * and rotations. - * - * \warning unless \a traits is always set to NoShear or NoScaling, this function - * requires the generic inverse method of MatrixBase defined in the LU module. If - * you forget to include this module, then you will get hard to debug linking errors. - * - * \sa MatrixBase::inverse() - */ -template -inline const typename Transform::MatrixType -Transform::inverse(TransformTraits traits) const -{ - if (traits == Projective) - { - return m_matrix.inverse(); - } - else - { - MatrixType res; - if (traits == Affine) - { - res.template corner(TopLeft) = linear().inverse(); - } - else if (traits == Isometry) - { - res.template corner(TopLeft) = linear().transpose(); - } - else - { - ei_assert("invalid traits value in Transform::inverse()"); - } - // translation and remaining parts - res.template corner(TopRight) = - res.template corner(TopLeft) * translation(); - res.template corner<1,Dim>(BottomLeft).setZero(); - res.coeffRef(Dim,Dim) = Scalar(1); - return res; - } -} - -/***************************************************** -*** Specializations of operator* with a MatrixBase *** -*****************************************************/ - -template -struct ei_transform_product_impl -{ - typedef Transform TransformType; - typedef typename TransformType::MatrixType MatrixType; - typedef typename ProductReturnType::Type ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { return tr.matrix() * other; } -}; - -template -struct ei_transform_product_impl -{ - typedef Transform TransformType; - typedef typename TransformType::MatrixType MatrixType; - typedef TransformType ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { - TransformType res; - res.translation() = tr.translation(); - res.matrix().row(Dim) = tr.matrix().row(Dim); - res.linear() = (tr.linear() * other).lazy(); - return res; - } -}; - -template -struct ei_transform_product_impl -{ - typedef Transform TransformType; - typedef typename TransformType::MatrixType MatrixType; - typedef typename ProductReturnType::Type ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { return tr.matrix() * other; } -}; - -template -struct ei_transform_product_impl -{ - typedef typename Other::Scalar Scalar; - typedef Transform TransformType; - typedef Matrix ResultType; - static ResultType run(const TransformType& tr, const Other& other) - { return ((tr.linear() * other) + tr.translation()) - * (Scalar(1) / ( (tr.matrix().template block<1,Dim>(Dim,0) * other).coeff(0) + tr.matrix().coeff(Dim,Dim))); } -}; - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Translation.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Translation.h index 2b9859f6f..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Translation.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Geometry/Translation.h @@ -1,184 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// no include guard, we'll include this twice from All.h from Eigen2Support, and it's internal anyway - -namespace Eigen { - -/** \geometry_module \ingroup Geometry_Module - * - * \class Translation - * - * \brief Represents a translation transformation - * - * \param _Scalar the scalar type, i.e., the type of the coefficients. - * \param _Dim the dimension of the space, can be a compile time value or Dynamic - * - * \note This class is not aimed to be used to store a translation transformation, - * but rather to make easier the constructions and updates of Transform objects. - * - * \sa class Scaling, class Transform - */ -template -class Translation -{ -public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_Dim) - /** dimension of the space */ - enum { Dim = _Dim }; - /** the scalar type of the coefficients */ - typedef _Scalar Scalar; - /** corresponding vector type */ - typedef Matrix VectorType; - /** corresponding linear transformation matrix type */ - typedef Matrix LinearMatrixType; - /** corresponding scaling transformation type */ - typedef Scaling ScalingType; - /** corresponding affine transformation type */ - typedef Transform TransformType; - -protected: - - VectorType m_coeffs; - -public: - - /** Default constructor without initialization. */ - Translation() {} - /** */ - inline Translation(const Scalar& sx, const Scalar& sy) - { - ei_assert(Dim==2); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - } - /** */ - inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz) - { - ei_assert(Dim==3); - m_coeffs.x() = sx; - m_coeffs.y() = sy; - m_coeffs.z() = sz; - } - /** Constructs and initialize the scaling transformation from a vector of scaling coefficients */ - explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {} - - const VectorType& vector() const { return m_coeffs; } - VectorType& vector() { return m_coeffs; } - - /** Concatenates two translation */ - inline Translation operator* (const Translation& other) const - { return Translation(m_coeffs + other.m_coeffs); } - - /** Concatenates a translation and a scaling */ - inline TransformType operator* (const ScalingType& other) const; - - /** Concatenates a translation and a linear transformation */ - inline TransformType operator* (const LinearMatrixType& linear) const; - - template - inline TransformType operator*(const RotationBase& r) const - { return *this * r.toRotationMatrix(); } - - /** Concatenates a linear transformation and a translation */ - // its a nightmare to define a templated friend function outside its declaration - friend inline TransformType operator* (const LinearMatrixType& linear, const Translation& t) - { - TransformType res; - res.matrix().setZero(); - res.linear() = linear; - res.translation() = linear * t.m_coeffs; - res.matrix().row(Dim).setZero(); - res(Dim,Dim) = Scalar(1); - return res; - } - - /** Concatenates a translation and an affine transformation */ - inline TransformType operator* (const TransformType& t) const; - - /** Applies translation to vector */ - inline VectorType operator* (const VectorType& other) const - { return m_coeffs + other; } - - /** \returns the inverse translation (opposite) */ - Translation inverse() const { return Translation(-m_coeffs); } - - Translation& operator=(const Translation& other) - { - m_coeffs = other.m_coeffs; - return *this; - } - - /** \returns \c *this with scalar type casted to \a NewScalarType - * - * Note that if \a NewScalarType is equal to the current scalar type of \c *this - * then this function smartly returns a const reference to \c *this. - */ - template - inline typename internal::cast_return_type >::type cast() const - { return typename internal::cast_return_type >::type(*this); } - - /** Copy constructor with scalar type conversion */ - template - inline explicit Translation(const Translation& other) - { m_coeffs = other.vector().template cast(); } - - /** \returns \c true if \c *this is approximately equal to \a other, within the precision - * determined by \a prec. - * - * \sa MatrixBase::isApprox() */ - bool isApprox(const Translation& other, typename NumTraits::Real prec = precision()) const - { return m_coeffs.isApprox(other.m_coeffs, prec); } - -}; - -/** \addtogroup Geometry_Module */ -//@{ -typedef Translation Translation2f; -typedef Translation Translation2d; -typedef Translation Translation3f; -typedef Translation Translation3d; -//@} - - -template -inline typename Translation::TransformType -Translation::operator* (const ScalingType& other) const -{ - TransformType res; - res.matrix().setZero(); - res.linear().diagonal() = other.coeffs(); - res.translation() = m_coeffs; - res(Dim,Dim) = Scalar(1); - return res; -} - -template -inline typename Translation::TransformType -Translation::operator* (const LinearMatrixType& linear) const -{ - TransformType res; - res.matrix().setZero(); - res.linear() = linear; - res.translation() = m_coeffs; - res.matrix().row(Dim).setZero(); - res(Dim,Dim) = Scalar(1); - return res; -} - -template -inline typename Translation::TransformType -Translation::operator* (const TransformType& t) const -{ - TransformType res = t; - res.pretranslate(m_coeffs); - return res; -} - -} // end namespace Eigen diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LU.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LU.h index 49f19ad76..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LU.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LU.h @@ -1,120 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_LU_H -#define EIGEN2_LU_H - -namespace Eigen { - -template -class LU : public FullPivLU -{ - public: - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix IntRowVectorType; - typedef Matrix IntColVectorType; - typedef Matrix RowVectorType; - typedef Matrix ColVectorType; - - typedef Matrix KernelResultType; - - typedef Matrix ImageResultType; - - typedef FullPivLU Base; - - template - explicit LU(const T& t) : Base(t), m_originalMatrix(t) {} - - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = static_cast(this)->solve(b); - return true; - } - - template - inline void computeInverse(ResultType *result) const - { - solve(MatrixType::Identity(this->rows(), this->cols()), result); - } - - template - void computeKernel(KernelMatrixType *result) const - { - *result = static_cast(this)->kernel(); - } - - template - void computeImage(ImageMatrixType *result) const - { - *result = static_cast(this)->image(m_originalMatrix); - } - - const ImageResultType image() const - { - return static_cast(this)->image(m_originalMatrix); - } - - const MatrixType& m_originalMatrix; -}; - -#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS -/** \lu_module - * - * Synonym of partialPivLu(). - * - * \return the partial-pivoting LU decomposition of \c *this. - * - * \sa class PartialPivLU - */ -template -inline const LU::PlainObject> -MatrixBase::lu() const -{ - return LU(eval()); -} -#endif - -#ifdef EIGEN2_SUPPORT -/** \lu_module - * - * Synonym of partialPivLu(). - * - * \return the partial-pivoting LU decomposition of \c *this. - * - * \sa class PartialPivLU - */ -template -inline const LU::PlainObject> -MatrixBase::eigen2_lu() const -{ - return LU(eval()); -} -#endif - -} // end namespace Eigen - -#endif // EIGEN2_LU_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Lazy.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Lazy.h index 593fc78e6..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Lazy.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Lazy.h @@ -1,71 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_LAZY_H -#define EIGEN_LAZY_H - -namespace Eigen { - -/** \deprecated it is only used by lazy() which is deprecated - * - * \returns an expression of *this with added flags - * - * Example: \include MatrixBase_marked.cpp - * Output: \verbinclude MatrixBase_marked.out - * - * \sa class Flagged, extract(), part() - */ -template -template -inline const Flagged -MatrixBase::marked() const -{ - return derived(); -} - -/** \deprecated use MatrixBase::noalias() - * - * \returns an expression of *this with the EvalBeforeAssigningBit flag removed. - * - * Example: \include MatrixBase_lazy.cpp - * Output: \verbinclude MatrixBase_lazy.out - * - * \sa class Flagged, marked() - */ -template -inline const Flagged -MatrixBase::lazy() const -{ - return derived(); -} - - -/** \internal - * Overloaded to perform an efficient C += (A*B).lazy() */ -template -template -Derived& MatrixBase::operator+=(const Flagged, 0, - EvalBeforeAssigningBit>& other) -{ - other._expression().derived().addTo(derived()); return derived(); -} - -/** \internal - * Overloaded to perform an efficient C -= (A*B).lazy() */ -template -template -Derived& MatrixBase::operator-=(const Flagged, 0, - EvalBeforeAssigningBit>& other) -{ - other._expression().derived().subTo(derived()); return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_LAZY_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LeastSquares.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LeastSquares.h index 7992d4944..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LeastSquares.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/LeastSquares.h @@ -1,169 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_LEASTSQUARES_H -#define EIGEN2_LEASTSQUARES_H - -namespace Eigen { - -/** \ingroup LeastSquares_Module - * - * \leastsquares_module - * - * For a set of points, this function tries to express - * one of the coords as a linear (affine) function of the other coords. - * - * This is best explained by an example. This function works in full - * generality, for points in a space of arbitrary dimension, and also over - * the complex numbers, but for this example we will work in dimension 3 - * over the real numbers (doubles). - * - * So let us work with the following set of 5 points given by their - * \f$(x,y,z)\f$ coordinates: - * @code - Vector3d points[5]; - points[0] = Vector3d( 3.02, 6.89, -4.32 ); - points[1] = Vector3d( 2.01, 5.39, -3.79 ); - points[2] = Vector3d( 2.41, 6.01, -4.01 ); - points[3] = Vector3d( 2.09, 5.55, -3.86 ); - points[4] = Vector3d( 2.58, 6.32, -4.10 ); - * @endcode - * Suppose that we want to express the second coordinate (\f$y\f$) as a linear - * expression in \f$x\f$ and \f$z\f$, that is, - * \f[ y=ax+bz+c \f] - * for some constants \f$a,b,c\f$. Thus, we want to find the best possible - * constants \f$a,b,c\f$ so that the plane of equation \f$y=ax+bz+c\f$ fits - * best the five above points. To do that, call this function as follows: - * @code - Vector3d coeffs; // will store the coefficients a, b, c - linearRegression( - 5, - &points, - &coeffs, - 1 // the coord to express as a function of - // the other ones. 0 means x, 1 means y, 2 means z. - ); - * @endcode - * Now the vector \a coeffs is approximately - * \f$( 0.495 , -1.927 , -2.906 )\f$. - * Thus, we get \f$a=0.495, b = -1.927, c = -2.906\f$. Let us check for - * instance how near points[0] is from the plane of equation \f$y=ax+bz+c\f$. - * Looking at the coords of points[0], we see that: - * \f[ax+bz+c = 0.495 * 3.02 + (-1.927) * (-4.32) + (-2.906) = 6.91.\f] - * On the other hand, we have \f$y=6.89\f$. We see that the values - * \f$6.91\f$ and \f$6.89\f$ - * are near, so points[0] is very near the plane of equation \f$y=ax+bz+c\f$. - * - * Let's now describe precisely the parameters: - * @param numPoints the number of points - * @param points the array of pointers to the points on which to perform the linear regression - * @param result pointer to the vector in which to store the result. - This vector must be of the same type and size as the - data points. The meaning of its coords is as follows. - For brevity, let \f$n=Size\f$, - \f$r_i=result[i]\f$, - and \f$f=funcOfOthers\f$. Denote by - \f$x_0,\ldots,x_{n-1}\f$ - the n coordinates in the n-dimensional space. - Then the resulting equation is: - \f[ x_f = r_0 x_0 + \cdots + r_{f-1}x_{f-1} - + r_{f+1}x_{f+1} + \cdots + r_{n-1}x_{n-1} + r_n. \f] - * @param funcOfOthers Determines which coord to express as a function of the - others. Coords are numbered starting from 0, so that a - value of 0 means \f$x\f$, 1 means \f$y\f$, - 2 means \f$z\f$, ... - * - * \sa fitHyperplane() - */ -template -void linearRegression(int numPoints, - VectorType **points, - VectorType *result, - int funcOfOthers ) -{ - typedef typename VectorType::Scalar Scalar; - typedef Hyperplane HyperplaneType; - const int size = points[0]->size(); - result->resize(size); - HyperplaneType h(size); - fitHyperplane(numPoints, points, &h); - for(int i = 0; i < funcOfOthers; i++) - result->coeffRef(i) = - h.coeffs()[i] / h.coeffs()[funcOfOthers]; - for(int i = funcOfOthers; i < size; i++) - result->coeffRef(i) = - h.coeffs()[i+1] / h.coeffs()[funcOfOthers]; -} - -/** \ingroup LeastSquares_Module - * - * \leastsquares_module - * - * This function is quite similar to linearRegression(), so we refer to the - * documentation of this function and only list here the differences. - * - * The main difference from linearRegression() is that this function doesn't - * take a \a funcOfOthers argument. Instead, it finds a general equation - * of the form - * \f[ r_0 x_0 + \cdots + r_{n-1}x_{n-1} + r_n = 0, \f] - * where \f$n=Size\f$, \f$r_i=retCoefficients[i]\f$, and we denote by - * \f$x_0,\ldots,x_{n-1}\f$ the n coordinates in the n-dimensional space. - * - * Thus, the vector \a retCoefficients has size \f$n+1\f$, which is another - * difference from linearRegression(). - * - * In practice, this function performs an hyper-plane fit in a total least square sense - * via the following steps: - * 1 - center the data to the mean - * 2 - compute the covariance matrix - * 3 - pick the eigenvector corresponding to the smallest eigenvalue of the covariance matrix - * The ratio of the smallest eigenvalue and the second one gives us a hint about the relevance - * of the solution. This value is optionally returned in \a soundness. - * - * \sa linearRegression() - */ -template -void fitHyperplane(int numPoints, - VectorType **points, - HyperplaneType *result, - typename NumTraits::Real* soundness = 0) -{ - typedef typename VectorType::Scalar Scalar; - typedef Matrix CovMatrixType; - EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType) - ei_assert(numPoints >= 1); - int size = points[0]->size(); - ei_assert(size+1 == result->coeffs().size()); - - // compute the mean of the data - VectorType mean = VectorType::Zero(size); - for(int i = 0; i < numPoints; ++i) - mean += *(points[i]); - mean /= numPoints; - - // compute the covariance matrix - CovMatrixType covMat = CovMatrixType::Zero(size, size); - for(int i = 0; i < numPoints; ++i) - { - VectorType diff = (*(points[i]) - mean).conjugate(); - covMat += diff * diff.adjoint(); - } - - // now we just have to pick the eigen vector with smallest eigen value - SelfAdjointEigenSolver eig(covMat); - result->normal() = eig.eigenvectors().col(0); - if (soundness) - *soundness = eig.eigenvalues().coeff(0)/eig.eigenvalues().coeff(1); - - // let's compute the constant coefficient such that the - // plane pass trough the mean point: - result->offset() = - (result->normal().cwise()* mean).sum(); -} - -} // end namespace Eigen - -#endif // EIGEN2_LEASTSQUARES_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Macros.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Macros.h index 351c32afb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Macros.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Macros.h @@ -1,20 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_MACROS_H -#define EIGEN2_MACROS_H - -#define ei_assert eigen_assert -#define ei_internal_assert eigen_internal_assert - -#define EIGEN_ALIGN_128 EIGEN_ALIGN16 - -#define EIGEN_ARCH_WANTS_ALIGNMENT EIGEN_ALIGN_STATICALLY - -#endif // EIGEN2_MACROS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/MathFunctions.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/MathFunctions.h index 3544af253..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/MathFunctions.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/MathFunctions.h @@ -1,57 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_MATH_FUNCTIONS_H -#define EIGEN2_MATH_FUNCTIONS_H - -namespace Eigen { - -template inline typename NumTraits::Real ei_real(const T& x) { return numext::real(x); } -template inline typename NumTraits::Real ei_imag(const T& x) { return numext::imag(x); } -template inline T ei_conj(const T& x) { return numext::conj(x); } -template inline typename NumTraits::Real ei_abs (const T& x) { using std::abs; return abs(x); } -template inline typename NumTraits::Real ei_abs2(const T& x) { return numext::abs2(x); } -template inline T ei_sqrt(const T& x) { using std::sqrt; return sqrt(x); } -template inline T ei_exp (const T& x) { using std::exp; return exp(x); } -template inline T ei_log (const T& x) { using std::log; return log(x); } -template inline T ei_sin (const T& x) { using std::sin; return sin(x); } -template inline T ei_cos (const T& x) { using std::cos; return cos(x); } -template inline T ei_atan2(const T& x,const T& y) { using std::atan2; return atan2(x,y); } -template inline T ei_pow (const T& x,const T& y) { return numext::pow(x,y); } -template inline T ei_random () { return internal::random(); } -template inline T ei_random (const T& x, const T& y) { return internal::random(x, y); } - -template inline T precision () { return NumTraits::dummy_precision(); } -template inline T machine_epsilon () { return NumTraits::epsilon(); } - - -template -inline bool ei_isMuchSmallerThan(const Scalar& x, const OtherScalar& y, - typename NumTraits::Real precision = NumTraits::dummy_precision()) -{ - return internal::isMuchSmallerThan(x, y, precision); -} - -template -inline bool ei_isApprox(const Scalar& x, const Scalar& y, - typename NumTraits::Real precision = NumTraits::dummy_precision()) -{ - return internal::isApprox(x, y, precision); -} - -template -inline bool ei_isApproxOrLessThan(const Scalar& x, const Scalar& y, - typename NumTraits::Real precision = NumTraits::dummy_precision()) -{ - return internal::isApproxOrLessThan(x, y, precision); -} - -} // end namespace Eigen - -#endif // EIGEN2_MATH_FUNCTIONS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Memory.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Memory.h index f86372b6b..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Memory.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Memory.h @@ -1,45 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_MEMORY_H -#define EIGEN2_MEMORY_H - -namespace Eigen { - -inline void* ei_aligned_malloc(size_t size) { return internal::aligned_malloc(size); } -inline void ei_aligned_free(void *ptr) { internal::aligned_free(ptr); } -inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size) { return internal::aligned_realloc(ptr, new_size, old_size); } -inline void* ei_handmade_aligned_malloc(size_t size) { return internal::handmade_aligned_malloc(size); } -inline void ei_handmade_aligned_free(void *ptr) { internal::handmade_aligned_free(ptr); } - -template inline void* ei_conditional_aligned_malloc(size_t size) -{ - return internal::conditional_aligned_malloc(size); -} -template inline void ei_conditional_aligned_free(void *ptr) -{ - internal::conditional_aligned_free(ptr); -} -template inline void* ei_conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size) -{ - return internal::conditional_aligned_realloc(ptr, new_size, old_size); -} - -template inline T* ei_aligned_new(size_t size) -{ - return internal::aligned_new(size); -} -template inline void ei_aligned_delete(T *ptr, size_t size) -{ - return internal::aligned_delete(ptr, size); -} - -} // end namespace Eigen - -#endif // EIGEN2_MACROS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Meta.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Meta.h index fa37cfc96..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Meta.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Meta.h @@ -1,75 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_META_H -#define EIGEN2_META_H - -namespace Eigen { - -template -struct ei_traits : internal::traits -{}; - -struct ei_meta_true { enum { ret = 1 }; }; -struct ei_meta_false { enum { ret = 0 }; }; - -template -struct ei_meta_if { typedef Then ret; }; - -template -struct ei_meta_if { typedef Else ret; }; - -template struct ei_is_same_type { enum { ret = 0 }; }; -template struct ei_is_same_type { enum { ret = 1 }; }; - -template struct ei_unref { typedef T type; }; -template struct ei_unref { typedef T type; }; - -template struct ei_unpointer { typedef T type; }; -template struct ei_unpointer { typedef T type; }; -template struct ei_unpointer { typedef T type; }; - -template struct ei_unconst { typedef T type; }; -template struct ei_unconst { typedef T type; }; -template struct ei_unconst { typedef T & type; }; -template struct ei_unconst { typedef T * type; }; - -template struct ei_cleantype { typedef T type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; -template struct ei_cleantype { typedef typename ei_cleantype::type type; }; - -/** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. - * Usage example: \code ei_meta_sqrt<1023>::ret \endcode - */ -template Y))) > - // use ?: instead of || just to shut up a stupid gcc 4.3 warning -class ei_meta_sqrt -{ - enum { - MidX = (InfX+SupX)/2, - TakeInf = MidX*MidX > Y ? 1 : 0, - NewInf = int(TakeInf) ? InfX : int(MidX), - NewSup = int(TakeInf) ? int(MidX) : SupX - }; - public: - enum { ret = ei_meta_sqrt::ret }; -}; - -template -class ei_meta_sqrt { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; }; - -} // end namespace Eigen - -#endif // EIGEN2_META_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Minor.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Minor.h index 4cded5734..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Minor.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/Minor.h @@ -1,117 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2006-2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MINOR_H -#define EIGEN_MINOR_H - -namespace Eigen { - -/** - * \class Minor - * - * \brief Expression of a minor - * - * \param MatrixType the type of the object in which we are taking a minor - * - * This class represents an expression of a minor. It is the return - * type of MatrixBase::minor() and most of the time this is the only way it - * is used. - * - * \sa MatrixBase::minor() - */ - -namespace internal { -template -struct traits > - : traits -{ - typedef typename nested::type MatrixTypeNested; - typedef typename remove_reference::type _MatrixTypeNested; - typedef typename MatrixType::StorageKind StorageKind; - enum { - RowsAtCompileTime = (MatrixType::RowsAtCompileTime != Dynamic) ? - int(MatrixType::RowsAtCompileTime) - 1 : Dynamic, - ColsAtCompileTime = (MatrixType::ColsAtCompileTime != Dynamic) ? - int(MatrixType::ColsAtCompileTime) - 1 : Dynamic, - MaxRowsAtCompileTime = (MatrixType::MaxRowsAtCompileTime != Dynamic) ? - int(MatrixType::MaxRowsAtCompileTime) - 1 : Dynamic, - MaxColsAtCompileTime = (MatrixType::MaxColsAtCompileTime != Dynamic) ? - int(MatrixType::MaxColsAtCompileTime) - 1 : Dynamic, - Flags = _MatrixTypeNested::Flags & (HereditaryBits | LvalueBit), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost // minor is used typically on tiny matrices, - // where loops are unrolled and the 'if' evaluates at compile time - }; -}; -} - -template class Minor - : public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Minor) - - inline Minor(const MatrixType& matrix, - Index row, Index col) - : m_matrix(matrix), m_row(row), m_col(col) - { - eigen_assert(row >= 0 && row < matrix.rows() - && col >= 0 && col < matrix.cols()); - } - - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Minor) - - inline Index rows() const { return m_matrix.rows() - 1; } - inline Index cols() const { return m_matrix.cols() - 1; } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(row + (row >= m_row), col + (col >= m_col)); - } - - inline const Scalar coeff(Index row, Index col) const - { - return m_matrix.coeff(row + (row >= m_row), col + (col >= m_col)); - } - - protected: - const typename MatrixType::Nested m_matrix; - const Index m_row, m_col; -}; - -/** - * \return an expression of the (\a row, \a col)-minor of *this, - * i.e. an expression constructed from *this by removing the specified - * row and column. - * - * Example: \include MatrixBase_minor.cpp - * Output: \verbinclude MatrixBase_minor.out - * - * \sa class Minor - */ -template -inline Minor -MatrixBase::minor(Index row, Index col) -{ - return Minor(derived(), row, col); -} - -/** - * This is the const version of minor(). */ -template -inline const Minor -MatrixBase::minor(Index row, Index col) const -{ - return Minor(derived(), row, col); -} - -} // end namespace Eigen - -#endif // EIGEN_MINOR_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/QR.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/QR.h index 2042c9851..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/QR.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/QR.h @@ -1,67 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2011 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_QR_H -#define EIGEN2_QR_H - -namespace Eigen { - -template -class QR : public HouseholderQR -{ - public: - - typedef HouseholderQR Base; - typedef Block MatrixRBlockType; - - QR() : Base() {} - - template - explicit QR(const T& t) : Base(t) {} - - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = static_cast(this)->solve(b); - return true; - } - - MatrixType matrixQ(void) const { - MatrixType ret = MatrixType::Identity(this->rows(), this->cols()); - ret = this->householderQ() * ret; - return ret; - } - - bool isFullRank() const { - return true; - } - - const TriangularView - matrixR(void) const - { - int cols = this->cols(); - return MatrixRBlockType(this->matrixQR(), 0, 0, cols, cols).template triangularView(); - } -}; - -/** \return the QR decomposition of \c *this. - * - * \sa class QR - */ -template -const QR::PlainObject> -MatrixBase::qr() const -{ - return QR(eval()); -} - -} // end namespace Eigen - -#endif // EIGEN2_QR_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/SVD.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/SVD.h index 3d03d2288..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/SVD.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/SVD.h @@ -1,637 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_SVD_H -#define EIGEN2_SVD_H - -namespace Eigen { - -/** \ingroup SVD_Module - * \nonstableyet - * - * \class SVD - * - * \brief Standard SVD decomposition of a matrix and associated features - * - * \param MatrixType the type of the matrix of which we are computing the SVD decomposition - * - * This class performs a standard SVD decomposition of a real matrix A of size \c M x \c N - * with \c M \>= \c N. - * - * - * \sa MatrixBase::SVD() - */ -template class SVD -{ - private: - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - - enum { - PacketSize = internal::packet_traits::size, - AlignmentMask = int(PacketSize)-1, - MinSize = EIGEN_SIZE_MIN_PREFER_DYNAMIC(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime) - }; - - typedef Matrix ColVector; - typedef Matrix RowVector; - - typedef Matrix MatrixUType; - typedef Matrix MatrixVType; - typedef Matrix SingularValuesType; - - public: - - SVD() {} // a user who relied on compiler-generated default compiler reported problems with MSVC in 2.0.7 - - SVD(const MatrixType& matrix) - : m_matU(matrix.rows(), (std::min)(matrix.rows(), matrix.cols())), - m_matV(matrix.cols(),matrix.cols()), - m_sigma((std::min)(matrix.rows(),matrix.cols())) - { - compute(matrix); - } - - template - bool solve(const MatrixBase &b, ResultType* result) const; - - const MatrixUType& matrixU() const { return m_matU; } - const SingularValuesType& singularValues() const { return m_sigma; } - const MatrixVType& matrixV() const { return m_matV; } - - void compute(const MatrixType& matrix); - SVD& sort(); - - template - void computeUnitaryPositive(UnitaryType *unitary, PositiveType *positive) const; - template - void computePositiveUnitary(PositiveType *positive, UnitaryType *unitary) const; - template - void computeRotationScaling(RotationType *unitary, ScalingType *positive) const; - template - void computeScalingRotation(ScalingType *positive, RotationType *unitary) const; - - protected: - /** \internal */ - MatrixUType m_matU; - /** \internal */ - MatrixVType m_matV; - /** \internal */ - SingularValuesType m_sigma; -}; - -/** Computes / recomputes the SVD decomposition A = U S V^* of \a matrix - * - * \note this code has been adapted from JAMA (public domain) - */ -template -void SVD::compute(const MatrixType& matrix) -{ - const int m = matrix.rows(); - const int n = matrix.cols(); - const int nu = (std::min)(m,n); - ei_assert(m>=n && "In Eigen 2.0, SVD only works for MxN matrices with M>=N. Sorry!"); - ei_assert(m>1 && "In Eigen 2.0, SVD doesn't work on 1x1 matrices"); - - m_matU.resize(m, nu); - m_matU.setZero(); - m_sigma.resize((std::min)(m,n)); - m_matV.resize(n,n); - - RowVector e(n); - ColVector work(m); - MatrixType matA(matrix); - const bool wantu = true; - const bool wantv = true; - int i=0, j=0, k=0; - - // Reduce A to bidiagonal form, storing the diagonal elements - // in s and the super-diagonal elements in e. - int nct = (std::min)(m-1,n); - int nrt = (std::max)(0,(std::min)(n-2,m)); - for (k = 0; k < (std::max)(nct,nrt); ++k) - { - if (k < nct) - { - // Compute the transformation for the k-th column and - // place the k-th diagonal in m_sigma[k]. - m_sigma[k] = matA.col(k).end(m-k).norm(); - if (m_sigma[k] != 0.0) // FIXME - { - if (matA(k,k) < 0.0) - m_sigma[k] = -m_sigma[k]; - matA.col(k).end(m-k) /= m_sigma[k]; - matA(k,k) += 1.0; - } - m_sigma[k] = -m_sigma[k]; - } - - for (j = k+1; j < n; ++j) - { - if ((k < nct) && (m_sigma[k] != 0.0)) - { - // Apply the transformation. - Scalar t = matA.col(k).end(m-k).eigen2_dot(matA.col(j).end(m-k)); // FIXME dot product or cwise prod + .sum() ?? - t = -t/matA(k,k); - matA.col(j).end(m-k) += t * matA.col(k).end(m-k); - } - - // Place the k-th row of A into e for the - // subsequent calculation of the row transformation. - e[j] = matA(k,j); - } - - // Place the transformation in U for subsequent back multiplication. - if (wantu & (k < nct)) - m_matU.col(k).end(m-k) = matA.col(k).end(m-k); - - if (k < nrt) - { - // Compute the k-th row transformation and place the - // k-th super-diagonal in e[k]. - e[k] = e.end(n-k-1).norm(); - if (e[k] != 0.0) - { - if (e[k+1] < 0.0) - e[k] = -e[k]; - e.end(n-k-1) /= e[k]; - e[k+1] += 1.0; - } - e[k] = -e[k]; - if ((k+1 < m) & (e[k] != 0.0)) - { - // Apply the transformation. - work.end(m-k-1) = matA.corner(BottomRight,m-k-1,n-k-1) * e.end(n-k-1); - for (j = k+1; j < n; ++j) - matA.col(j).end(m-k-1) += (-e[j]/e[k+1]) * work.end(m-k-1); - } - - // Place the transformation in V for subsequent back multiplication. - if (wantv) - m_matV.col(k).end(n-k-1) = e.end(n-k-1); - } - } - - - // Set up the final bidiagonal matrix or order p. - int p = (std::min)(n,m+1); - if (nct < n) - m_sigma[nct] = matA(nct,nct); - if (m < p) - m_sigma[p-1] = 0.0; - if (nrt+1 < p) - e[nrt] = matA(nrt,p-1); - e[p-1] = 0.0; - - // If required, generate U. - if (wantu) - { - for (j = nct; j < nu; ++j) - { - m_matU.col(j).setZero(); - m_matU(j,j) = 1.0; - } - for (k = nct-1; k >= 0; k--) - { - if (m_sigma[k] != 0.0) - { - for (j = k+1; j < nu; ++j) - { - Scalar t = m_matU.col(k).end(m-k).eigen2_dot(m_matU.col(j).end(m-k)); // FIXME is it really a dot product we want ? - t = -t/m_matU(k,k); - m_matU.col(j).end(m-k) += t * m_matU.col(k).end(m-k); - } - m_matU.col(k).end(m-k) = - m_matU.col(k).end(m-k); - m_matU(k,k) = Scalar(1) + m_matU(k,k); - if (k-1>0) - m_matU.col(k).start(k-1).setZero(); - } - else - { - m_matU.col(k).setZero(); - m_matU(k,k) = 1.0; - } - } - } - - // If required, generate V. - if (wantv) - { - for (k = n-1; k >= 0; k--) - { - if ((k < nrt) & (e[k] != 0.0)) - { - for (j = k+1; j < nu; ++j) - { - Scalar t = m_matV.col(k).end(n-k-1).eigen2_dot(m_matV.col(j).end(n-k-1)); // FIXME is it really a dot product we want ? - t = -t/m_matV(k+1,k); - m_matV.col(j).end(n-k-1) += t * m_matV.col(k).end(n-k-1); - } - } - m_matV.col(k).setZero(); - m_matV(k,k) = 1.0; - } - } - - // Main iteration loop for the singular values. - int pp = p-1; - int iter = 0; - Scalar eps = ei_pow(Scalar(2),ei_is_same_type::ret ? Scalar(-23) : Scalar(-52)); - while (p > 0) - { - int k=0; - int kase=0; - - // Here is where a test for too many iterations would go. - - // This section of the program inspects for - // negligible elements in the s and e arrays. On - // completion the variables kase and k are set as follows. - - // kase = 1 if s(p) and e[k-1] are negligible and k

= -1; --k) - { - if (k == -1) - break; - if (ei_abs(e[k]) <= eps*(ei_abs(m_sigma[k]) + ei_abs(m_sigma[k+1]))) - { - e[k] = 0.0; - break; - } - } - if (k == p-2) - { - kase = 4; - } - else - { - int ks; - for (ks = p-1; ks >= k; --ks) - { - if (ks == k) - break; - Scalar t = (ks != p ? ei_abs(e[ks]) : Scalar(0)) + (ks != k+1 ? ei_abs(e[ks-1]) : Scalar(0)); - if (ei_abs(m_sigma[ks]) <= eps*t) - { - m_sigma[ks] = 0.0; - break; - } - } - if (ks == k) - { - kase = 3; - } - else if (ks == p-1) - { - kase = 1; - } - else - { - kase = 2; - k = ks; - } - } - ++k; - - // Perform the task indicated by kase. - switch (kase) - { - - // Deflate negligible s(p). - case 1: - { - Scalar f(e[p-2]); - e[p-2] = 0.0; - for (j = p-2; j >= k; --j) - { - Scalar t(numext::hypot(m_sigma[j],f)); - Scalar cs(m_sigma[j]/t); - Scalar sn(f/t); - m_sigma[j] = t; - if (j != k) - { - f = -sn*e[j-1]; - e[j-1] = cs*e[j-1]; - } - if (wantv) - { - for (i = 0; i < n; ++i) - { - t = cs*m_matV(i,j) + sn*m_matV(i,p-1); - m_matV(i,p-1) = -sn*m_matV(i,j) + cs*m_matV(i,p-1); - m_matV(i,j) = t; - } - } - } - } - break; - - // Split at negligible s(k). - case 2: - { - Scalar f(e[k-1]); - e[k-1] = 0.0; - for (j = k; j < p; ++j) - { - Scalar t(numext::hypot(m_sigma[j],f)); - Scalar cs( m_sigma[j]/t); - Scalar sn(f/t); - m_sigma[j] = t; - f = -sn*e[j]; - e[j] = cs*e[j]; - if (wantu) - { - for (i = 0; i < m; ++i) - { - t = cs*m_matU(i,j) + sn*m_matU(i,k-1); - m_matU(i,k-1) = -sn*m_matU(i,j) + cs*m_matU(i,k-1); - m_matU(i,j) = t; - } - } - } - } - break; - - // Perform one qr step. - case 3: - { - // Calculate the shift. - Scalar scale = (std::max)((std::max)((std::max)((std::max)( - ei_abs(m_sigma[p-1]),ei_abs(m_sigma[p-2])),ei_abs(e[p-2])), - ei_abs(m_sigma[k])),ei_abs(e[k])); - Scalar sp = m_sigma[p-1]/scale; - Scalar spm1 = m_sigma[p-2]/scale; - Scalar epm1 = e[p-2]/scale; - Scalar sk = m_sigma[k]/scale; - Scalar ek = e[k]/scale; - Scalar b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/Scalar(2); - Scalar c = (sp*epm1)*(sp*epm1); - Scalar shift(0); - if ((b != 0.0) || (c != 0.0)) - { - shift = ei_sqrt(b*b + c); - if (b < 0.0) - shift = -shift; - shift = c/(b + shift); - } - Scalar f = (sk + sp)*(sk - sp) + shift; - Scalar g = sk*ek; - - // Chase zeros. - - for (j = k; j < p-1; ++j) - { - Scalar t = numext::hypot(f,g); - Scalar cs = f/t; - Scalar sn = g/t; - if (j != k) - e[j-1] = t; - f = cs*m_sigma[j] + sn*e[j]; - e[j] = cs*e[j] - sn*m_sigma[j]; - g = sn*m_sigma[j+1]; - m_sigma[j+1] = cs*m_sigma[j+1]; - if (wantv) - { - for (i = 0; i < n; ++i) - { - t = cs*m_matV(i,j) + sn*m_matV(i,j+1); - m_matV(i,j+1) = -sn*m_matV(i,j) + cs*m_matV(i,j+1); - m_matV(i,j) = t; - } - } - t = numext::hypot(f,g); - cs = f/t; - sn = g/t; - m_sigma[j] = t; - f = cs*e[j] + sn*m_sigma[j+1]; - m_sigma[j+1] = -sn*e[j] + cs*m_sigma[j+1]; - g = sn*e[j+1]; - e[j+1] = cs*e[j+1]; - if (wantu && (j < m-1)) - { - for (i = 0; i < m; ++i) - { - t = cs*m_matU(i,j) + sn*m_matU(i,j+1); - m_matU(i,j+1) = -sn*m_matU(i,j) + cs*m_matU(i,j+1); - m_matU(i,j) = t; - } - } - } - e[p-2] = f; - iter = iter + 1; - } - break; - - // Convergence. - case 4: - { - // Make the singular values positive. - if (m_sigma[k] <= 0.0) - { - m_sigma[k] = m_sigma[k] < Scalar(0) ? -m_sigma[k] : Scalar(0); - if (wantv) - m_matV.col(k).start(pp+1) = -m_matV.col(k).start(pp+1); - } - - // Order the singular values. - while (k < pp) - { - if (m_sigma[k] >= m_sigma[k+1]) - break; - Scalar t = m_sigma[k]; - m_sigma[k] = m_sigma[k+1]; - m_sigma[k+1] = t; - if (wantv && (k < n-1)) - m_matV.col(k).swap(m_matV.col(k+1)); - if (wantu && (k < m-1)) - m_matU.col(k).swap(m_matU.col(k+1)); - ++k; - } - iter = 0; - p--; - } - break; - } // end big switch - } // end iterations -} - -template -SVD& SVD::sort() -{ - int mu = m_matU.rows(); - int mv = m_matV.rows(); - int n = m_matU.cols(); - - for (int i=0; i p) - { - k = j; - p = m_sigma.coeff(j); - } - } - if (k != i) - { - m_sigma.coeffRef(k) = m_sigma.coeff(i); // i.e. - m_sigma.coeffRef(i) = p; // swaps the i-th and the k-th elements - - int j = mu; - for(int s=0; j!=0; ++s, --j) - std::swap(m_matU.coeffRef(s,i), m_matU.coeffRef(s,k)); - - j = mv; - for (int s=0; j!=0; ++s, --j) - std::swap(m_matV.coeffRef(s,i), m_matV.coeffRef(s,k)); - } - } - return *this; -} - -/** \returns the solution of \f$ A x = b \f$ using the current SVD decomposition of A. - * The parts of the solution corresponding to zero singular values are ignored. - * - * \sa MatrixBase::svd(), LU::solve(), LLT::solve() - */ -template -template -bool SVD::solve(const MatrixBase &b, ResultType* result) const -{ - ei_assert(b.rows() == m_matU.rows()); - - Scalar maxVal = m_sigma.cwise().abs().maxCoeff(); - for (int j=0; j aux = m_matU.transpose() * b.col(j); - - for (int i = 0; i col(j) = m_matV * aux; - } - return true; -} - -/** Computes the polar decomposition of the matrix, as a product unitary x positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * Only for square matrices. - * - * \sa computePositiveUnitary(), computeRotationScaling() - */ -template -template -void SVD::computeUnitaryPositive(UnitaryType *unitary, - PositiveType *positive) const -{ - ei_assert(m_matU.cols() == m_matV.cols() && "Polar decomposition is only for square matrices"); - if(unitary) *unitary = m_matU * m_matV.adjoint(); - if(positive) *positive = m_matV * m_sigma.asDiagonal() * m_matV.adjoint(); -} - -/** Computes the polar decomposition of the matrix, as a product positive x unitary. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * Only for square matrices. - * - * \sa computeUnitaryPositive(), computeRotationScaling() - */ -template -template -void SVD::computePositiveUnitary(UnitaryType *positive, - PositiveType *unitary) const -{ - ei_assert(m_matU.rows() == m_matV.rows() && "Polar decomposition is only for square matrices"); - if(unitary) *unitary = m_matU * m_matV.adjoint(); - if(positive) *positive = m_matU * m_sigma.asDiagonal() * m_matU.adjoint(); -} - -/** decomposes the matrix as a product rotation x scaling, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * This method requires the Geometry module. - * - * \sa computeScalingRotation(), computeUnitaryPositive() - */ -template -template -void SVD::computeRotationScaling(RotationType *rotation, ScalingType *scaling) const -{ - ei_assert(m_matU.rows() == m_matV.rows() && "Polar decomposition is only for square matrices"); - Scalar x = (m_matU * m_matV.adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(m_sigma); - sv.coeffRef(0) *= x; - if(scaling) scaling->lazyAssign(m_matV * sv.asDiagonal() * m_matV.adjoint()); - if(rotation) - { - MatrixType m(m_matU); - m.col(0) /= x; - rotation->lazyAssign(m * m_matV.adjoint()); - } -} - -/** decomposes the matrix as a product scaling x rotation, the scaling being - * not necessarily positive. - * - * If either pointer is zero, the corresponding computation is skipped. - * - * This method requires the Geometry module. - * - * \sa computeRotationScaling(), computeUnitaryPositive() - */ -template -template -void SVD::computeScalingRotation(ScalingType *scaling, RotationType *rotation) const -{ - ei_assert(m_matU.rows() == m_matV.rows() && "Polar decomposition is only for square matrices"); - Scalar x = (m_matU * m_matV.adjoint()).determinant(); // so x has absolute value 1 - Matrix sv(m_sigma); - sv.coeffRef(0) *= x; - if(scaling) scaling->lazyAssign(m_matU * sv.asDiagonal() * m_matU.adjoint()); - if(rotation) - { - MatrixType m(m_matU); - m.col(0) /= x; - rotation->lazyAssign(m * m_matV.adjoint()); - } -} - - -/** \svd_module - * \returns the SVD decomposition of \c *this - */ -template -inline SVD::PlainObject> -MatrixBase::svd() const -{ - return SVD(derived()); -} - -} // end namespace Eigen - -#endif // EIGEN2_SVD_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/TriangularSolver.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/TriangularSolver.h index ebbeb3b49..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/TriangularSolver.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/TriangularSolver.h @@ -1,42 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_TRIANGULAR_SOLVER2_H -#define EIGEN_TRIANGULAR_SOLVER2_H - -namespace Eigen { - -const unsigned int UnitDiagBit = UnitDiag; -const unsigned int SelfAdjointBit = SelfAdjoint; -const unsigned int UpperTriangularBit = Upper; -const unsigned int LowerTriangularBit = Lower; - -const unsigned int UpperTriangular = Upper; -const unsigned int LowerTriangular = Lower; -const unsigned int UnitUpperTriangular = UnitUpper; -const unsigned int UnitLowerTriangular = UnitLower; - -template -template -typename ExpressionType::PlainObject -Flagged::solveTriangular(const MatrixBase& other) const -{ - return m_matrix.template triangularView().solve(other.derived()); -} - -template -template -void Flagged::solveTriangularInPlace(const MatrixBase& other) const -{ - m_matrix.template triangularView().solveInPlace(other.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_TRIANGULAR_SOLVER2_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/VectorBlock.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/VectorBlock.h index 71a8080a9..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/VectorBlock.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigen2Support/VectorBlock.h @@ -1,94 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2_VECTORBLOCK_H -#define EIGEN2_VECTORBLOCK_H - -namespace Eigen { - -/** \deprecated use DenseMase::head(Index) */ -template -inline VectorBlock -MatrixBase::start(Index size) -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0, size); -} - -/** \deprecated use DenseMase::head(Index) */ -template -inline const VectorBlock -MatrixBase::start(Index size) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0, size); -} - -/** \deprecated use DenseMase::tail(Index) */ -template -inline VectorBlock -MatrixBase::end(Index size) -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), this->size() - size, size); -} - -/** \deprecated use DenseMase::tail(Index) */ -template -inline const VectorBlock -MatrixBase::end(Index size) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), this->size() - size, size); -} - -/** \deprecated use DenseMase::head() */ -template -template -inline VectorBlock -MatrixBase::start() -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0); -} - -/** \deprecated use DenseMase::head() */ -template -template -inline const VectorBlock -MatrixBase::start() const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), 0); -} - -/** \deprecated use DenseMase::tail() */ -template -template -inline VectorBlock -MatrixBase::end() -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), size() - Size); -} - -/** \deprecated use DenseMase::tail() */ -template -template -inline const VectorBlock -MatrixBase::end() const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return VectorBlock(derived(), size() - Size); -} - -} // end namespace Eigen - -#endif // EIGEN2_VECTORBLOCK_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/CMakeLists.txt index 193e02685..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_EIGENVALUES_SRCS "*.h") - -INSTALL(FILES - ${Eigen_EIGENVALUES_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Eigenvalues COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/ComplexSchur_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/ComplexSchur_MKL.h index 27aed923c..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/ComplexSchur_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/ComplexSchur_MKL.h @@ -1,93 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Complex Schur needed to complex unsymmetrical eigenvalues/eigenvectors. - ******************************************************************************** -*/ - -#ifndef EIGEN_COMPLEX_SCHUR_MKL_H -#define EIGEN_COMPLEX_SCHUR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_SCHUR_COMPLEX(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -ComplexSchur >& \ -ComplexSchur >::compute(const Matrix& matrix, bool computeU) \ -{ \ - typedef Matrix MatrixType; \ - typedef MatrixType::RealScalar RealScalar; \ - typedef std::complex ComplexScalar; \ -\ - eigen_assert(matrix.cols() == matrix.rows()); \ -\ - m_matUisUptodate = false; \ - if(matrix.cols() == 1) \ - { \ - m_matT = matrix.cast(); \ - if(computeU) m_matU = ComplexMatrixType::Identity(1,1); \ - m_info = Success; \ - m_isInitialized = true; \ - m_matUisUptodate = computeU; \ - return *this; \ - } \ - lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ - lapack_int matrix_order = MKLCOLROW; \ - char jobvs, sort='N'; \ - LAPACK_##MKLPREFIX_U##_SELECT1 select = 0; \ - jobvs = (computeU) ? 'V' : 'N'; \ - m_matU.resize(n, n); \ - lapack_int ldvs = m_matU.outerStride(); \ - m_matT = matrix; \ - Matrix w; \ - w.resize(n, 1);\ - info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)w.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ - if(info == 0) \ - m_info = Success; \ - else \ - m_info = NoConvergence; \ -\ - m_isInitialized = true; \ - m_matUisUptodate = computeU; \ - return *this; \ -\ -} - -EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(dcomplex, MKL_Complex16, z, Z, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_SCHUR_COMPLEX(scomplex, MKL_Complex8, c, C, RowMajor, LAPACK_ROW_MAJOR) - -} // end namespace Eigen - -#endif // EIGEN_COMPLEX_SCHUR_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/RealSchur_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/RealSchur_MKL.h index c3089b468..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/RealSchur_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/RealSchur_MKL.h @@ -1,79 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Real Schur needed to real unsymmetrical eigenvalues/eigenvectors. - ******************************************************************************** -*/ - -#ifndef EIGEN_REAL_SCHUR_MKL_H -#define EIGEN_REAL_SCHUR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_SCHUR_REAL(EIGTYPE, MKLTYPE, MKLPREFIX, MKLPREFIX_U, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -RealSchur >& \ -RealSchur >::compute(const Matrix& matrix, bool computeU) \ -{ \ - eigen_assert(matrix.cols() == matrix.rows()); \ -\ - lapack_int n = matrix.cols(), sdim, info; \ - lapack_int lda = matrix.outerStride(); \ - lapack_int matrix_order = MKLCOLROW; \ - char jobvs, sort='N'; \ - LAPACK_##MKLPREFIX_U##_SELECT2 select = 0; \ - jobvs = (computeU) ? 'V' : 'N'; \ - m_matU.resize(n, n); \ - lapack_int ldvs = m_matU.outerStride(); \ - m_matT = matrix; \ - Matrix wr, wi; \ - wr.resize(n, 1); wi.resize(n, 1); \ - info = LAPACKE_##MKLPREFIX##gees( matrix_order, jobvs, sort, select, n, (MKLTYPE*)m_matT.data(), lda, &sdim, (MKLTYPE*)wr.data(), (MKLTYPE*)wi.data(), (MKLTYPE*)m_matU.data(), ldvs ); \ - if(info == 0) \ - m_info = Success; \ - else \ - m_info = NoConvergence; \ -\ - m_isInitialized = true; \ - m_matUisUptodate = computeU; \ - return *this; \ -\ -} - -EIGEN_MKL_SCHUR_REAL(double, double, d, D, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_REAL(float, float, s, S, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_SCHUR_REAL(double, double, d, D, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_SCHUR_REAL(float, float, s, S, RowMajor, LAPACK_ROW_MAJOR) - -} // end namespace Eigen - -#endif // EIGEN_REAL_SCHUR_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h index 17c0dadd2..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_MKL.h @@ -1,92 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Self-adjoint eigenvalues/eigenvectors. - ******************************************************************************** -*/ - -#ifndef EIGEN_SAEIGENSOLVER_MKL_H -#define EIGEN_SAEIGENSOLVER_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_EIG_SELFADJ(EIGTYPE, MKLTYPE, MKLRTYPE, MKLNAME, EIGCOLROW, MKLCOLROW ) \ -template<> inline \ -SelfAdjointEigenSolver >& \ -SelfAdjointEigenSolver >::compute(const Matrix& matrix, int options) \ -{ \ - eigen_assert(matrix.cols() == matrix.rows()); \ - eigen_assert((options&~(EigVecMask|GenEigMask))==0 \ - && (options&EigVecMask)!=EigVecMask \ - && "invalid option parameter"); \ - bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ - lapack_int n = matrix.cols(), lda, matrix_order, info; \ - m_eivalues.resize(n,1); \ - m_subdiag.resize(n-1); \ - m_eivec = matrix; \ -\ - if(n==1) \ - { \ - m_eivalues.coeffRef(0,0) = numext::real(matrix.coeff(0,0)); \ - if(computeEigenvectors) m_eivec.setOnes(n,n); \ - m_info = Success; \ - m_isInitialized = true; \ - m_eigenvectorsOk = computeEigenvectors; \ - return *this; \ - } \ -\ - lda = matrix.outerStride(); \ - matrix_order=MKLCOLROW; \ - char jobz, uplo='L'/*, range='A'*/; \ - jobz = computeEigenvectors ? 'V' : 'N'; \ -\ - info = LAPACKE_##MKLNAME( matrix_order, jobz, uplo, n, (MKLTYPE*)m_eivec.data(), lda, (MKLRTYPE*)m_eivalues.data() ); \ - m_info = (info==0) ? Success : NoConvergence; \ - m_isInitialized = true; \ - m_eigenvectorsOk = computeEigenvectors; \ - return *this; \ -} - - -EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, ColMajor, LAPACK_COL_MAJOR) - -EIGEN_MKL_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(dcomplex, MKL_Complex16, double, zheev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_MKL_EIG_SELFADJ(scomplex, MKL_Complex8, float, cheev, RowMajor, LAPACK_ROW_MAJOR) - -} // end namespace Eigen - -#endif // EIGEN_SAEIGENSOLVER_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Geometry/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Geometry/CMakeLists.txt index f8f728b84..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Geometry/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Geometry/CMakeLists.txt @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_Geometry_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Geometry_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry COMPONENT Devel - ) - -ADD_SUBDIRECTORY(arch) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Geometry/arch/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Geometry/arch/CMakeLists.txt index 1267a79c7..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Geometry/arch/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Geometry/arch/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Geometry_arch_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Geometry_arch_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Geometry/arch COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Householder/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Householder/CMakeLists.txt index ce4937db0..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Householder/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Householder/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Householder_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Householder_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Householder COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 1f3c060d0..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -1,149 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BASIC_PRECONDITIONERS_H -#define EIGEN_BASIC_PRECONDITIONERS_H - -namespace Eigen { - -/** \ingroup IterativeLinearSolvers_Module - * \brief A preconditioner based on the digonal entries - * - * This class allows to approximately solve for A.x = b problems assuming A is a diagonal matrix. - * In other words, this preconditioner neglects all off diagonal entries and, in Eigen's language, solves for: - * \code - * A.diagonal().asDiagonal() . x = b - * \endcode - * - * \tparam _Scalar the type of the scalar. - * - * This preconditioner is suitable for both selfadjoint and general problems. - * The diagonal entries are pre-inverted and stored into a dense vector. - * - * \note A variant that has yet to be implemented would attempt to preserve the norm of each column. - * - */ -template -class DiagonalPreconditioner -{ - typedef _Scalar Scalar; - typedef Matrix Vector; - typedef typename Vector::Index Index; - - public: - // this typedef is only to export the scalar type and compile-time dimensions to solve_retval - typedef Matrix MatrixType; - - DiagonalPreconditioner() : m_isInitialized(false) {} - - template - DiagonalPreconditioner(const MatType& mat) : m_invdiag(mat.cols()) - { - compute(mat); - } - - Index rows() const { return m_invdiag.size(); } - Index cols() const { return m_invdiag.size(); } - - template - DiagonalPreconditioner& analyzePattern(const MatType& ) - { - return *this; - } - - template - DiagonalPreconditioner& factorize(const MatType& mat) - { - m_invdiag.resize(mat.cols()); - for(int j=0; j - DiagonalPreconditioner& compute(const MatType& mat) - { - return factorize(mat); - } - - template - void _solve(const Rhs& b, Dest& x) const - { - x = m_invdiag.array() * b.array() ; - } - - template inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "DiagonalPreconditioner is not initialized."); - eigen_assert(m_invdiag.size()==b.rows() - && "DiagonalPreconditioner::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - protected: - Vector m_invdiag; - bool m_isInitialized; -}; - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef DiagonalPreconditioner<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} - -/** \ingroup IterativeLinearSolvers_Module - * \brief A naive preconditioner which approximates any matrix as the identity matrix - * - * \sa class DiagonalPreconditioner - */ -class IdentityPreconditioner -{ - public: - - IdentityPreconditioner() {} - - template - IdentityPreconditioner(const MatrixType& ) {} - - template - IdentityPreconditioner& analyzePattern(const MatrixType& ) { return *this; } - - template - IdentityPreconditioner& factorize(const MatrixType& ) { return *this; } - - template - IdentityPreconditioner& compute(const MatrixType& ) { return *this; } - - template - inline const Rhs& solve(const Rhs& b) const { return b; } -}; - -} // end namespace Eigen - -#endif // EIGEN_BASIC_PRECONDITIONERS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 551221907..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -1,263 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_BICGSTAB_H -#define EIGEN_BICGSTAB_H - -namespace Eigen { - -namespace internal { - -/** \internal Low-level bi conjugate gradient stabilized algorithm - * \param mat The matrix A - * \param rhs The right hand side vector b - * \param x On input and initial solution, on output the computed solution. - * \param precond A preconditioner being able to efficiently solve for an - * approximation of Ax=b (regardless of b) - * \param iters On input the max number of iteration, on output the number of performed iterations. - * \param tol_error On input the tolerance error, on output an estimation of the relative error. - * \return false in the case of numerical issue, for example a break down of BiCGSTAB. - */ -template -bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, - const Preconditioner& precond, int& iters, - typename Dest::RealScalar& tol_error) -{ - using std::sqrt; - using std::abs; - typedef typename Dest::RealScalar RealScalar; - typedef typename Dest::Scalar Scalar; - typedef Matrix VectorType; - RealScalar tol = tol_error; - int maxIters = iters; - - int n = mat.cols(); - VectorType r = rhs - mat * x; - VectorType r0 = r; - - RealScalar r0_sqnorm = r0.squaredNorm(); - RealScalar rhs_sqnorm = rhs.squaredNorm(); - if(rhs_sqnorm == 0) - { - x.setZero(); - return true; - } - Scalar rho = 1; - Scalar alpha = 1; - Scalar w = 1; - - VectorType v = VectorType::Zero(n), p = VectorType::Zero(n); - VectorType y(n), z(n); - VectorType kt(n), ks(n); - - VectorType s(n), t(n); - - RealScalar tol2 = tol*tol; - RealScalar eps2 = NumTraits::epsilon()*NumTraits::epsilon(); - int i = 0; - int restarts = 0; - - while ( r.squaredNorm()/rhs_sqnorm > tol2 && iRealScalar(0)) - w = t.dot(s) / tmp; - else - w = Scalar(0); - x += alpha * y + w * z; - r = s - w * t; - ++i; - } - tol_error = sqrt(r.squaredNorm()/rhs_sqnorm); - iters = i; - return true; -} - -} - -template< typename _MatrixType, - typename _Preconditioner = DiagonalPreconditioner > -class BiCGSTAB; - -namespace internal { - -template< typename _MatrixType, typename _Preconditioner> -struct traits > -{ - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; -}; - -} - -/** \ingroup IterativeLinearSolvers_Module - * \brief A bi conjugate gradient stabilized solver for sparse square problems - * - * This class allows to solve for A.x = b sparse linear problems using a bi conjugate gradient - * stabilized algorithm. The vectors x and b can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix. - * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner - * - * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() - * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations - * and NumTraits::epsilon() for the tolerance. - * - * This class can be used as the direct solver classes. Here is a typical usage example: - * \code - * int n = 10000; - * VectorXd x(n), b(n); - * SparseMatrix A(n,n); - * // fill A and b - * BiCGSTAB > solver; - * solver.compute(A); - * x = solver.solve(b); - * std::cout << "#iterations: " << solver.iterations() << std::endl; - * std::cout << "estimated error: " << solver.error() << std::endl; - * // update b, and solve again - * x = solver.solve(b); - * \endcode - * - * By default the iterations start with x=0 as an initial guess of the solution. - * One can control the start using the solveWithGuess() method. - * - * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner - */ -template< typename _MatrixType, typename _Preconditioner> -class BiCGSTAB : public IterativeSolverBase > -{ - typedef IterativeSolverBase Base; - using Base::mp_matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; -public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - -public: - - /** Default constructor. */ - BiCGSTAB() : Base() {} - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - * - * This constructor is a shortcut for the default constructor followed - * by a call to compute(). - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template - explicit BiCGSTAB(const EigenBase& A) : Base(A.derived()) {} - - ~BiCGSTAB() {} - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A - * \a x0 as an initial solution. - * - * \sa compute() - */ - template - inline const internal::solve_retval_with_guess - solveWithGuess(const MatrixBase& b, const Guess& x0) const - { - eigen_assert(m_isInitialized && "BiCGSTAB is not initialized."); - eigen_assert(Base::rows()==b.rows() - && "BiCGSTAB::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval_with_guess - (*this, b.derived(), x0); - } - - /** \internal */ - template - void _solveWithGuess(const Rhs& b, Dest& x) const - { - bool failed = false; - for(int j=0; j - void _solve(const Rhs& b, Dest& x) const - { -// x.setZero(); - x = b; - _solveWithGuess(b,x); - } - -protected: - -}; - - -namespace internal { - - template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef BiCGSTAB<_MatrixType, _Preconditioner> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_BICGSTAB_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/CMakeLists.txt index 59ccc0072..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_IterativeLinearSolvers_SRCS "*.h") - -INSTALL(FILES - ${Eigen_IterativeLinearSolvers_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/IterativeLinearSolvers COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h index 7dd4010c3..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h @@ -1,258 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CONJUGATE_GRADIENT_H -#define EIGEN_CONJUGATE_GRADIENT_H - -namespace Eigen { - -namespace internal { - -/** \internal Low-level conjugate gradient algorithm - * \param mat The matrix A - * \param rhs The right hand side vector b - * \param x On input and initial solution, on output the computed solution. - * \param precond A preconditioner being able to efficiently solve for an - * approximation of Ax=b (regardless of b) - * \param iters On input the max number of iteration, on output the number of performed iterations. - * \param tol_error On input the tolerance error, on output an estimation of the relative error. - */ -template -EIGEN_DONT_INLINE -void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x, - const Preconditioner& precond, int& iters, - typename Dest::RealScalar& tol_error) -{ - using std::sqrt; - using std::abs; - typedef typename Dest::RealScalar RealScalar; - typedef typename Dest::Scalar Scalar; - typedef Matrix VectorType; - - RealScalar tol = tol_error; - int maxIters = iters; - - int n = mat.cols(); - - VectorType residual = rhs - mat * x; //initial residual - - RealScalar rhsNorm2 = rhs.squaredNorm(); - if(rhsNorm2 == 0) - { - x.setZero(); - iters = 0; - tol_error = 0; - return; - } - RealScalar threshold = tol*tol*rhsNorm2; - RealScalar residualNorm2 = residual.squaredNorm(); - if (residualNorm2 < threshold) - { - iters = 0; - tol_error = sqrt(residualNorm2 / rhsNorm2); - return; - } - - VectorType p(n); - p = precond.solve(residual); //initial search direction - - VectorType z(n), tmp(n); - RealScalar absNew = numext::real(residual.dot(p)); // the square of the absolute value of r scaled by invM - int i = 0; - while(i < maxIters) - { - tmp.noalias() = mat * p; // the bottleneck of the algorithm - - Scalar alpha = absNew / p.dot(tmp); // the amount we travel on dir - x += alpha * p; // update solution - residual -= alpha * tmp; // update residue - - residualNorm2 = residual.squaredNorm(); - if(residualNorm2 < threshold) - break; - - z = precond.solve(residual); // approximately solve for "A z = residual" - - RealScalar absOld = absNew; - absNew = numext::real(residual.dot(z)); // update the absolute value of r - RealScalar beta = absNew / absOld; // calculate the Gram-Schmidt value used to create the new search direction - p = z + beta * p; // update search direction - i++; - } - tol_error = sqrt(residualNorm2 / rhsNorm2); - iters = i; -} - -} - -template< typename _MatrixType, int _UpLo=Lower, - typename _Preconditioner = DiagonalPreconditioner > -class ConjugateGradient; - -namespace internal { - -template< typename _MatrixType, int _UpLo, typename _Preconditioner> -struct traits > -{ - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; -}; - -} - -/** \ingroup IterativeLinearSolvers_Module - * \brief A conjugate gradient solver for sparse self-adjoint problems - * - * This class allows to solve for A.x = b sparse linear problems using a conjugate gradient algorithm. - * The sparse matrix A must be selfadjoint. The vectors x and b can be either dense or sparse. - * - * \tparam _MatrixType the type of the matrix A, can be a dense or a sparse matrix. - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower, - * Upper, or Lower|Upper in which the full matrix entries will be considered. Default is Lower. - * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner - * - * The maximal number of iterations and tolerance value can be controlled via the setMaxIterations() - * and setTolerance() methods. The defaults are the size of the problem for the maximal number of iterations - * and NumTraits::epsilon() for the tolerance. - * - * This class can be used as the direct solver classes. Here is a typical usage example: - * \code - * int n = 10000; - * VectorXd x(n), b(n); - * SparseMatrix A(n,n); - * // fill A and b - * ConjugateGradient > cg; - * cg.compute(A); - * x = cg.solve(b); - * std::cout << "#iterations: " << cg.iterations() << std::endl; - * std::cout << "estimated error: " << cg.error() << std::endl; - * // update b, and solve again - * x = cg.solve(b); - * \endcode - * - * By default the iterations start with x=0 as an initial guess of the solution. - * One can control the start using the solveWithGuess() method. - * - * ConjugateGradient can also be used in a matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. - * - * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner - */ -template< typename _MatrixType, int _UpLo, typename _Preconditioner> -class ConjugateGradient : public IterativeSolverBase > -{ - typedef IterativeSolverBase Base; - using Base::mp_matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; -public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - enum { - UpLo = _UpLo - }; - -public: - - /** Default constructor. */ - ConjugateGradient() : Base() {} - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - * - * This constructor is a shortcut for the default constructor followed - * by a call to compute(). - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template - explicit ConjugateGradient(const EigenBase& A) : Base(A.derived()) {} - - ~ConjugateGradient() {} - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A - * \a x0 as an initial solution. - * - * \sa compute() - */ - template - inline const internal::solve_retval_with_guess - solveWithGuess(const MatrixBase& b, const Guess& x0) const - { - eigen_assert(m_isInitialized && "ConjugateGradient is not initialized."); - eigen_assert(Base::rows()==b.rows() - && "ConjugateGradient::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval_with_guess - (*this, b.derived(), x0); - } - - /** \internal */ - template - void _solveWithGuess(const Rhs& b, Dest& x) const - { - typedef typename internal::conditional - >::type MatrixWrapperType; - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - for(int j=0; j - void _solve(const Rhs& b, Dest& x) const - { - x.setZero(); - _solveWithGuess(b,x); - } - -protected: - -}; - - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef ConjugateGradient<_MatrixType,_UpLo,_Preconditioner> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CONJUGATE_GRADIENT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index d3f37fea2..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -1,478 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INCOMPLETE_LUT_H -#define EIGEN_INCOMPLETE_LUT_H - - -namespace Eigen { - -namespace internal { - -/** \internal - * Compute a quick-sort split of a vector - * On output, the vector row is permuted such that its elements satisfy - * abs(row(i)) >= abs(row(ncut)) if incut - * \param row The vector of values - * \param ind The array of index for the elements in @p row - * \param ncut The number of largest elements to keep - **/ -template -Index QuickSplit(VectorV &row, VectorI &ind, Index ncut) -{ - typedef typename VectorV::RealScalar RealScalar; - using std::swap; - using std::abs; - Index mid; - Index n = row.size(); /* length of the vector */ - Index first, last ; - - ncut--; /* to fit the zero-based indices */ - first = 0; - last = n-1; - if (ncut < first || ncut > last ) return 0; - - do { - mid = first; - RealScalar abskey = abs(row(mid)); - for (Index j = first + 1; j <= last; j++) { - if ( abs(row(j)) > abskey) { - ++mid; - swap(row(mid), row(j)); - swap(ind(mid), ind(j)); - } - } - /* Interchange for the pivot element */ - swap(row(mid), row(first)); - swap(ind(mid), ind(first)); - - if (mid > ncut) last = mid - 1; - else if (mid < ncut ) first = mid + 1; - } while (mid != ncut ); - - return 0; /* mid is equal to ncut */ -} - -}// end namespace internal - -/** \ingroup IterativeLinearSolvers_Module - * \class IncompleteLUT - * \brief Incomplete LU factorization with dual-threshold strategy - * - * During the numerical factorization, two dropping rules are used : - * 1) any element whose magnitude is less than some tolerance is dropped. - * This tolerance is obtained by multiplying the input tolerance @p droptol - * by the average magnitude of all the original elements in the current row. - * 2) After the elimination of the row, only the @p fill largest elements in - * the L part and the @p fill largest elements in the U part are kept - * (in addition to the diagonal element ). Note that @p fill is computed from - * the input parameter @p fillfactor which is used the ratio to control the fill_in - * relatively to the initial number of nonzero elements. - * - * The two extreme cases are when @p droptol=0 (to keep all the @p fill*2 largest elements) - * and when @p fill=n/2 with @p droptol being different to zero. - * - * References : Yousef Saad, ILUT: A dual threshold incomplete LU factorization, - * Numerical Linear Algebra with Applications, 1(4), pp 387-402, 1994. - * - * NOTE : The following implementation is derived from the ILUT implementation - * in the SPARSKIT package, Copyright (C) 2005, the Regents of the University of Minnesota - * released under the terms of the GNU LGPL: - * http://www-users.cs.umn.edu/~saad/software/SPARSKIT/README - * However, Yousef Saad gave us permission to relicense his ILUT code to MPL2. - * See the Eigen mailing list archive, thread: ILUT, date: July 8, 2012: - * http://listengine.tuxfamily.org/lists.tuxfamily.org/eigen/2012/07/msg00064.html - * alternatively, on GMANE: - * http://comments.gmane.org/gmane.comp.lib.eigen/3302 - */ -template -class IncompleteLUT : internal::noncopyable -{ - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix Vector; - typedef SparseMatrix FactorType; - typedef SparseMatrix PermutType; - typedef typename FactorType::Index Index; - - public: - typedef Matrix MatrixType; - - IncompleteLUT() - : m_droptol(NumTraits::dummy_precision()), m_fillfactor(10), - m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false) - {} - - template - IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits::dummy_precision(), int fillfactor = 10) - : m_droptol(droptol),m_fillfactor(fillfactor), - m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false) - { - eigen_assert(fillfactor != 0); - compute(mat); - } - - Index rows() const { return m_lu.rows(); } - - Index cols() const { return m_lu.cols(); } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "IncompleteLUT is not initialized."); - return m_info; - } - - template - void analyzePattern(const MatrixType& amat); - - template - void factorize(const MatrixType& amat); - - /** - * Compute an incomplete LU factorization with dual threshold on the matrix mat - * No pivoting is done in this version - * - **/ - template - IncompleteLUT& compute(const MatrixType& amat) - { - analyzePattern(amat); - factorize(amat); - return *this; - } - - void setDroptol(const RealScalar& droptol); - void setFillfactor(int fillfactor); - - template - void _solve(const Rhs& b, Dest& x) const - { - x = m_Pinv * b; - x = m_lu.template triangularView().solve(x); - x = m_lu.template triangularView().solve(x); - x = m_P * x; - } - - template inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "IncompleteLUT is not initialized."); - eigen_assert(cols()==b.rows() - && "IncompleteLUT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - -protected: - - /** keeps off-diagonal entries; drops diagonal entries */ - struct keep_diag { - inline bool operator() (const Index& row, const Index& col, const Scalar&) const - { - return row!=col; - } - }; - -protected: - - FactorType m_lu; - RealScalar m_droptol; - int m_fillfactor; - bool m_analysisIsOk; - bool m_factorizationIsOk; - bool m_isInitialized; - ComputationInfo m_info; - PermutationMatrix m_P; // Fill-reducing permutation - PermutationMatrix m_Pinv; // Inverse permutation -}; - -/** - * Set control parameter droptol - * \param droptol Drop any element whose magnitude is less than this tolerance - **/ -template -void IncompleteLUT::setDroptol(const RealScalar& droptol) -{ - this->m_droptol = droptol; -} - -/** - * Set control parameter fillfactor - * \param fillfactor This is used to compute the number @p fill_in of largest elements to keep on each row. - **/ -template -void IncompleteLUT::setFillfactor(int fillfactor) -{ - this->m_fillfactor = fillfactor; -} - -template -template -void IncompleteLUT::analyzePattern(const _MatrixType& amat) -{ - // Compute the Fill-reducing permutation - // Since ILUT does not perform any numerical pivoting, - // it is highly preferable to keep the diagonal through symmetric permutations. -#ifndef EIGEN_MPL2_ONLY - // To this end, let's symmetrize the pattern and perform AMD on it. - SparseMatrix mat1 = amat; - SparseMatrix mat2 = amat.transpose(); - // FIXME for a matrix with nearly symmetric pattern, mat2+mat1 is the appropriate choice. - // on the other hand for a really non-symmetric pattern, mat2*mat1 should be prefered... - SparseMatrix AtA = mat2 + mat1; - AMDOrdering ordering; - ordering(AtA,m_P); - m_Pinv = m_P.inverse(); // cache the inverse permutation -#else - // If AMD is not available, (MPL2-only), then let's use the slower COLAMD routine. - SparseMatrix mat1 = amat; - COLAMDOrdering ordering; - ordering(mat1,m_Pinv); - m_P = m_Pinv.inverse(); -#endif - - m_analysisIsOk = true; - m_factorizationIsOk = false; - m_isInitialized = false; -} - -template -template -void IncompleteLUT::factorize(const _MatrixType& amat) -{ - using std::sqrt; - using std::swap; - using std::abs; - - eigen_assert((amat.rows() == amat.cols()) && "The factorization should be done on a square matrix"); - Index n = amat.cols(); // Size of the matrix - m_lu.resize(n,n); - // Declare Working vectors and variables - Vector u(n) ; // real values of the row -- maximum size is n -- - VectorXi ju(n); // column position of the values in u -- maximum size is n - VectorXi jr(n); // Indicate the position of the nonzero elements in the vector u -- A zero location is indicated by -1 - - // Apply the fill-reducing permutation - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - SparseMatrix mat; - mat = amat.twistedBy(m_Pinv); - - // Initialization - jr.fill(-1); - ju.fill(0); - u.fill(0); - - // number of largest elements to keep in each row: - Index fill_in = static_cast (amat.nonZeros()*m_fillfactor)/n+1; - if (fill_in > n) fill_in = n; - - // number of largest nonzero elements to keep in the L and the U part of the current row: - Index nnzL = fill_in/2; - Index nnzU = nnzL; - m_lu.reserve(n * (nnzL + nnzU + 1)); - - // global loop over the rows of the sparse matrix - for (Index ii = 0; ii < n; ii++) - { - // 1 - copy the lower and the upper part of the row i of mat in the working vector u - - Index sizeu = 1; // number of nonzero elements in the upper part of the current row - Index sizel = 0; // number of nonzero elements in the lower part of the current row - ju(ii) = ii; - u(ii) = 0; - jr(ii) = ii; - RealScalar rownorm = 0; - - typename FactorType::InnerIterator j_it(mat, ii); // Iterate through the current row ii - for (; j_it; ++j_it) - { - Index k = j_it.index(); - if (k < ii) - { - // copy the lower part - ju(sizel) = k; - u(sizel) = j_it.value(); - jr(k) = sizel; - ++sizel; - } - else if (k == ii) - { - u(ii) = j_it.value(); - } - else - { - // copy the upper part - Index jpos = ii + sizeu; - ju(jpos) = k; - u(jpos) = j_it.value(); - jr(k) = jpos; - ++sizeu; - } - rownorm += numext::abs2(j_it.value()); - } - - // 2 - detect possible zero row - if(rownorm==0) - { - m_info = NumericalIssue; - return; - } - // Take the 2-norm of the current row as a relative tolerance - rownorm = sqrt(rownorm); - - // 3 - eliminate the previous nonzero rows - Index jj = 0; - Index len = 0; - while (jj < sizel) - { - // In order to eliminate in the correct order, - // we must select first the smallest column index among ju(jj:sizel) - Index k; - Index minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment - k += jj; - if (minrow != ju(jj)) - { - // swap the two locations - Index j = ju(jj); - swap(ju(jj), ju(k)); - jr(minrow) = jj; jr(j) = k; - swap(u(jj), u(k)); - } - // Reset this location - jr(minrow) = -1; - - // Start elimination - typename FactorType::InnerIterator ki_it(m_lu, minrow); - while (ki_it && ki_it.index() < minrow) ++ki_it; - eigen_internal_assert(ki_it && ki_it.col()==minrow); - Scalar fact = u(jj) / ki_it.value(); - - // drop too small elements - if(abs(fact) <= m_droptol) - { - jj++; - continue; - } - - // linear combination of the current row ii and the row minrow - ++ki_it; - for (; ki_it; ++ki_it) - { - Scalar prod = fact * ki_it.value(); - Index j = ki_it.index(); - Index jpos = jr(j); - if (jpos == -1) // fill-in element - { - Index newpos; - if (j >= ii) // dealing with the upper part - { - newpos = ii + sizeu; - sizeu++; - eigen_internal_assert(sizeu<=n); - } - else // dealing with the lower part - { - newpos = sizel; - sizel++; - eigen_internal_assert(sizel<=ii); - } - ju(newpos) = j; - u(newpos) = -prod; - jr(j) = newpos; - } - else - u(jpos) -= prod; - } - // store the pivot element - u(len) = fact; - ju(len) = minrow; - ++len; - - jj++; - } // end of the elimination on the row ii - - // reset the upper part of the pointer jr to zero - for(Index k = 0; k m_droptol * rownorm ) - { - ++len; - u(ii + len) = u(ii + k); - ju(ii + len) = ju(ii + k); - } - } - sizeu = len + 1; // +1 to take into account the diagonal element - len = (std::min)(sizeu, nnzU); - typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1)); - typename VectorXi::SegmentReturnType juu(ju.segment(ii+1, sizeu-1)); - internal::QuickSplit(uu, juu, len); - - // store the largest elements of the U part - for(Index k = ii + 1; k < ii + len; k++) - m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k); - } - - m_lu.finalize(); - m_lu.makeCompressed(); - - m_factorizationIsOk = true; - m_isInitialized = m_factorizationIsOk; - m_info = Success; -} - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef IncompleteLUT<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_INCOMPLETE_LUT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h index 501ef2f8d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h @@ -1,282 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ITERATIVE_SOLVER_BASE_H -#define EIGEN_ITERATIVE_SOLVER_BASE_H - -namespace Eigen { - -/** \ingroup IterativeLinearSolvers_Module - * \brief Base class for linear iterative solvers - * - * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner - */ -template< typename Derived> -class IterativeSolverBase : internal::noncopyable -{ -public: - typedef typename internal::traits::MatrixType MatrixType; - typedef typename internal::traits::Preconditioner Preconditioner; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - typedef typename MatrixType::RealScalar RealScalar; - -public: - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } - - /** Default constructor. */ - IterativeSolverBase() - : mp_matrix(0) - { - init(); - } - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - * - * This constructor is a shortcut for the default constructor followed - * by a call to compute(). - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template - IterativeSolverBase(const EigenBase& A) - { - init(); - compute(A.derived()); - } - - ~IterativeSolverBase() {} - - /** Initializes the iterative solver for the sparcity pattern of the matrix \a A for further solving \c Ax=b problems. - * - * Currently, this function mostly call analyzePattern on the preconditioner. In the future - * we might, for instance, implement column reodering for faster matrix vector products. - */ - template - Derived& analyzePattern(const EigenBase& A) - { - grabInput(A.derived()); - m_preconditioner.analyzePattern(*mp_matrix); - m_isInitialized = true; - m_analysisIsOk = true; - m_info = Success; - return derived(); - } - - /** Initializes the iterative solver with the numerical values of the matrix \a A for further solving \c Ax=b problems. - * - * Currently, this function mostly call factorize on the preconditioner. - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template - Derived& factorize(const EigenBase& A) - { - grabInput(A.derived()); - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - m_preconditioner.factorize(*mp_matrix); - m_factorizationIsOk = true; - m_info = Success; - return derived(); - } - - /** Initializes the iterative solver with the matrix \a A for further solving \c Ax=b problems. - * - * Currently, this function mostly initialized/compute the preconditioner. In the future - * we might, for instance, implement column reodering for faster matrix vector products. - * - * \warning this class stores a reference to the matrix A as well as some - * precomputed values that depend on it. Therefore, if \a A is changed - * this class becomes invalid. Call compute() to update it with the new - * matrix A, or modify a copy of A. - */ - template - Derived& compute(const EigenBase& A) - { - grabInput(A.derived()); - m_preconditioner.compute(*mp_matrix); - m_isInitialized = true; - m_analysisIsOk = true; - m_factorizationIsOk = true; - m_info = Success; - return derived(); - } - - /** \internal */ - Index rows() const { return mp_matrix ? mp_matrix->rows() : 0; } - /** \internal */ - Index cols() const { return mp_matrix ? mp_matrix->cols() : 0; } - - /** \returns the tolerance threshold used by the stopping criteria */ - RealScalar tolerance() const { return m_tolerance; } - - /** Sets the tolerance threshold used by the stopping criteria */ - Derived& setTolerance(const RealScalar& tolerance) - { - m_tolerance = tolerance; - return derived(); - } - - /** \returns a read-write reference to the preconditioner for custom configuration. */ - Preconditioner& preconditioner() { return m_preconditioner; } - - /** \returns a read-only reference to the preconditioner. */ - const Preconditioner& preconditioner() const { return m_preconditioner; } - - /** \returns the max number of iterations */ - int maxIterations() const - { - return (mp_matrix && m_maxIterations<0) ? mp_matrix->cols() : m_maxIterations; - } - - /** Sets the max number of iterations */ - Derived& setMaxIterations(int maxIters) - { - m_maxIterations = maxIters; - return derived(); - } - - /** \returns the number of iterations performed during the last solve */ - int iterations() const - { - eigen_assert(m_isInitialized && "ConjugateGradient is not initialized."); - return m_iterations; - } - - /** \returns the tolerance error reached during the last solve */ - RealScalar error() const - { - eigen_assert(m_isInitialized && "ConjugateGradient is not initialized."); - return m_error; - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized."); - eigen_assert(rows()==b.rows() - && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(derived(), b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized."); - eigen_assert(rows()==b.rows() - && "IterativeSolverBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** \returns Success if the iterations converged, and NoConvergence otherwise. */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "IterativeSolverBase is not initialized."); - return m_info; - } - - /** \internal */ - template - void _solve_sparse(const Rhs& b, SparseMatrix &dest) const - { - eigen_assert(rows()==b.rows()); - - int rhsCols = b.cols(); - int size = b.rows(); - Eigen::Matrix tb(size); - Eigen::Matrix tx(size); - for(int k=0; k - void grabInput(const EigenBase& A) - { - // we const cast to prevent the creation of a MatrixType temporary by the compiler. - grabInput_impl(A.const_cast_derived()); - } - - template - void grabInput_impl(const EigenBase& A) - { - m_copyMatrix = A; - mp_matrix = &m_copyMatrix; - } - - void grabInput_impl(MatrixType& A) - { - if(MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==Dynamic) - m_copyMatrix.resize(0,0); - mp_matrix = &A; - } - - void init() - { - m_isInitialized = false; - m_analysisIsOk = false; - m_factorizationIsOk = false; - m_maxIterations = -1; - m_tolerance = NumTraits::epsilon(); - } - MatrixType m_copyMatrix; - const MatrixType* mp_matrix; - Preconditioner m_preconditioner; - - int m_maxIterations; - RealScalar m_tolerance; - - mutable RealScalar m_error; - mutable int m_iterations; - mutable ComputationInfo m_info; - mutable bool m_isInitialized, m_analysisIsOk, m_factorizationIsOk; -}; - -namespace internal { - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef IterativeSolverBase Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec().derived()._solve_sparse(rhs(),dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_ITERATIVE_SOLVER_BASE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/Jacobi/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/Jacobi/CMakeLists.txt index 490dac626..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/Jacobi/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/Jacobi/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Jacobi_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Jacobi_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Jacobi COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/LU/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/LU/CMakeLists.txt index e0d8d78c1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/LU/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/LU/CMakeLists.txt @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_LU_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel - ) - -ADD_SUBDIRECTORY(arch) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/LU/PartialPivLU_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/LU/PartialPivLU_MKL.h index 9035953c8..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/LU/PartialPivLU_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/LU/PartialPivLU_MKL.h @@ -1,85 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * LU decomposition with partial pivoting based on LAPACKE_?getrf function. - ******************************************************************************** -*/ - -#ifndef EIGEN_PARTIALLU_LAPACK_H -#define EIGEN_PARTIALLU_LAPACK_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -namespace internal { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template \ -struct partial_lu_impl \ -{ \ - /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ - { \ - EIGEN_UNUSED_VARIABLE(maxBlockSize);\ - lapack_int matrix_order, first_zero_pivot; \ - lapack_int m, n, lda, *ipiv, info; \ - EIGTYPE* a; \ -/* Set up parameters for ?getrf */ \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = luStride; \ - a = lu_data; \ - ipiv = row_transpositions; \ - m = rows; \ - n = cols; \ - nb_transpositions = 0; \ -\ - info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ -\ - for(int i=0;i= 0); \ -/* something should be done with nb_transpositions */ \ -\ - first_zero_pivot = info; \ - return first_zero_pivot; \ - } \ -}; - -EIGEN_MKL_LU_PARTPIV(double, double, d) -EIGEN_MKL_LU_PARTPIV(float, float, s) -EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/LU/arch/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/LU/arch/CMakeLists.txt index f6b7ed9ec..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/LU/arch/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/LU/arch/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_LU_arch_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_arch_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/CMakeLists.txt index 2bad31416..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_MetisSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_MetisSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/MetisSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/MetisSupport.h index f2bbef20c..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/MetisSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/MetisSupport/MetisSupport.h @@ -1,137 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef METIS_SUPPORT_H -#define METIS_SUPPORT_H - -namespace Eigen { -/** - * Get the fill-reducing ordering from the METIS package - * - * If A is the original matrix and Ap is the permuted matrix, - * the fill-reducing permutation is defined as follows : - * Row (column) i of A is the matperm(i) row (column) of Ap. - * WARNING: As computed by METIS, this corresponds to the vector iperm (instead of perm) - */ -template -class MetisOrdering -{ -public: - typedef PermutationMatrix PermutationType; - typedef Matrix IndexVector; - - template - void get_symmetrized_graph(const MatrixType& A) - { - Index m = A.cols(); - eigen_assert((A.rows() == A.cols()) && "ONLY FOR SQUARED MATRICES"); - // Get the transpose of the input matrix - MatrixType At = A.transpose(); - // Get the number of nonzeros elements in each row/col of At+A - Index TotNz = 0; - IndexVector visited(m); - visited.setConstant(-1); - for (int j = 0; j < m; j++) - { - // Compute the union structure of of A(j,:) and At(j,:) - visited(j) = j; // Do not include the diagonal element - // Get the nonzeros in row/column j of A - for (typename MatrixType::InnerIterator it(A, j); it; ++it) - { - Index idx = it.index(); // Get the row index (for column major) or column index (for row major) - if (visited(idx) != j ) - { - visited(idx) = j; - ++TotNz; - } - } - //Get the nonzeros in row/column j of At - for (typename MatrixType::InnerIterator it(At, j); it; ++it) - { - Index idx = it.index(); - if(visited(idx) != j) - { - visited(idx) = j; - ++TotNz; - } - } - } - // Reserve place for A + At - m_indexPtr.resize(m+1); - m_innerIndices.resize(TotNz); - - // Now compute the real adjacency list of each column/row - visited.setConstant(-1); - Index CurNz = 0; - for (int j = 0; j < m; j++) - { - m_indexPtr(j) = CurNz; - - visited(j) = j; // Do not include the diagonal element - // Add the pattern of row/column j of A to A+At - for (typename MatrixType::InnerIterator it(A,j); it; ++it) - { - Index idx = it.index(); // Get the row index (for column major) or column index (for row major) - if (visited(idx) != j ) - { - visited(idx) = j; - m_innerIndices(CurNz) = idx; - CurNz++; - } - } - //Add the pattern of row/column j of At to A+At - for (typename MatrixType::InnerIterator it(At, j); it; ++it) - { - Index idx = it.index(); - if(visited(idx) != j) - { - visited(idx) = j; - m_innerIndices(CurNz) = idx; - ++CurNz; - } - } - } - m_indexPtr(m) = CurNz; - } - - template - void operator() (const MatrixType& A, PermutationType& matperm) - { - Index m = A.cols(); - IndexVector perm(m),iperm(m); - // First, symmetrize the matrix graph. - get_symmetrized_graph(A); - int output_error; - - // Call the fill-reducing routine from METIS - output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data()); - - if(output_error != METIS_OK) - { - //FIXME The ordering interface should define a class of possible errors - std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n"; - return; - } - - // Get the fill-reducing permutation - //NOTE: If Ap is the permuted matrix then perm and iperm vectors are defined as follows - // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap - - matperm.resize(m); - for (int j = 0; j < m; j++) - matperm.indices()(iperm(j)) = j; - - } - - protected: - IndexVector m_indexPtr; // Pointer to the adjacenccy list of each row/column - IndexVector m_innerIndices; // Adjacency list -}; - -}// end namespace eigen -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Amd.h b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Amd.h index 658b954c7..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Amd.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Amd.h @@ -1,444 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud - -/* - -NOTE: this routine has been adapted from the CSparse library: - -Copyright (c) 2006, Timothy A. Davis. -http://www.suitesparse.com - -CSparse is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -CSparse is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this Module; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*/ - -#include "../Core/util/NonMPL2.h" - -#ifndef EIGEN_SPARSE_AMD_H -#define EIGEN_SPARSE_AMD_H - -namespace Eigen { - -namespace internal { - -template inline T amd_flip(const T& i) { return -i-2; } -template inline T amd_unflip(const T& i) { return i<0 ? amd_flip(i) : i; } -template inline bool amd_marked(const T0* w, const T1& j) { return w[j]<0; } -template inline void amd_mark(const T0* w, const T1& j) { return w[j] = amd_flip(w[j]); } - -/* clear w */ -template -static int cs_wclear (Index mark, Index lemax, Index *w, Index n) -{ - Index k; - if(mark < 2 || (mark + lemax < 0)) - { - for(k = 0; k < n; k++) - if(w[k] != 0) - w[k] = 1; - mark = 2; - } - return (mark); /* at this point, w[0..n-1] < mark holds */ -} - -/* depth-first search and postorder of a tree rooted at node j */ -template -Index cs_tdfs(Index j, Index k, Index *head, const Index *next, Index *post, Index *stack) -{ - int i, p, top = 0; - if(!head || !next || !post || !stack) return (-1); /* check inputs */ - stack[0] = j; /* place j on the stack */ - while (top >= 0) /* while (stack is not empty) */ - { - p = stack[top]; /* p = top of stack */ - i = head[p]; /* i = youngest child of p */ - if(i == -1) - { - top--; /* p has no unordered children left */ - post[k++] = p; /* node p is the kth postordered node */ - } - else - { - head[p] = next[i]; /* remove i from children of p */ - stack[++top] = i; /* start dfs on child node i */ - } - } - return k; -} - - -/** \internal - * \ingroup OrderingMethods_Module - * Approximate minimum degree ordering algorithm. - * \returns the permutation P reducing the fill-in of the input matrix \a C - * The input matrix \a C must be a selfadjoint compressed column major SparseMatrix object. Both the upper and lower parts have to be stored, but the diagonal entries are optional. - * On exit the values of C are destroyed */ -template -void minimum_degree_ordering(SparseMatrix& C, PermutationMatrix& perm) -{ - using std::sqrt; - - int d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1, - k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi, - ok, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, t; - unsigned int h; - - Index n = C.cols(); - dense = std::max (16, Index(10 * sqrt(double(n)))); /* find dense threshold */ - dense = std::min (n-2, dense); - - Index cnz = C.nonZeros(); - perm.resize(n+1); - t = cnz + cnz/5 + 2*n; /* add elbow room to C */ - C.resizeNonZeros(t); - - Index* W = new Index[8*(n+1)]; /* get workspace */ - Index* len = W; - Index* nv = W + (n+1); - Index* next = W + 2*(n+1); - Index* head = W + 3*(n+1); - Index* elen = W + 4*(n+1); - Index* degree = W + 5*(n+1); - Index* w = W + 6*(n+1); - Index* hhead = W + 7*(n+1); - Index* last = perm.indices().data(); /* use P as workspace for last */ - - /* --- Initialize quotient graph ---------------------------------------- */ - Index* Cp = C.outerIndexPtr(); - Index* Ci = C.innerIndexPtr(); - for(k = 0; k < n; k++) - len[k] = Cp[k+1] - Cp[k]; - len[n] = 0; - nzmax = t; - - for(i = 0; i <= n; i++) - { - head[i] = -1; // degree list i is empty - last[i] = -1; - next[i] = -1; - hhead[i] = -1; // hash list i is empty - nv[i] = 1; // node i is just one node - w[i] = 1; // node i is alive - elen[i] = 0; // Ek of node i is empty - degree[i] = len[i]; // degree of node i - } - mark = internal::cs_wclear(0, 0, w, n); /* clear w */ - - /* --- Initialize degree lists ------------------------------------------ */ - for(i = 0; i < n; i++) - { - bool has_diag = false; - for(p = Cp[i]; p dense || !has_diag) /* node i is dense or has no structural diagonal element */ - { - nv[i] = 0; /* absorb i into element n */ - elen[i] = -1; /* node i is dead */ - nel++; - Cp[i] = amd_flip (n); - nv[n]++; - } - else - { - if(head[d] != -1) last[head[d]] = i; - next[i] = head[d]; /* put node i in degree list d */ - head[d] = i; - } - } - - elen[n] = -2; /* n is a dead element */ - Cp[n] = -1; /* n is a root of assembly tree */ - w[n] = 0; /* n is a dead element */ - - while (nel < n) /* while (selecting pivots) do */ - { - /* --- Select node of minimum approximate degree -------------------- */ - for(k = -1; mindeg < n && (k = head[mindeg]) == -1; mindeg++) {} - if(next[k] != -1) last[next[k]] = -1; - head[mindeg] = next[k]; /* remove k from degree list */ - elenk = elen[k]; /* elenk = |Ek| */ - nvk = nv[k]; /* # of nodes k represents */ - nel += nvk; /* nv[k] nodes of A eliminated */ - - /* --- Garbage collection ------------------------------------------- */ - if(elenk > 0 && cnz + mindeg >= nzmax) - { - for(j = 0; j < n; j++) - { - if((p = Cp[j]) >= 0) /* j is a live node or element */ - { - Cp[j] = Ci[p]; /* save first entry of object */ - Ci[p] = amd_flip (j); /* first entry is now amd_flip(j) */ - } - } - for(q = 0, p = 0; p < cnz; ) /* scan all of memory */ - { - if((j = amd_flip (Ci[p++])) >= 0) /* found object j */ - { - Ci[q] = Cp[j]; /* restore first entry of object */ - Cp[j] = q++; /* new pointer to object j */ - for(k3 = 0; k3 < len[j]-1; k3++) Ci[q++] = Ci[p++]; - } - } - cnz = q; /* Ci[cnz...nzmax-1] now free */ - } - - /* --- Construct new element ---------------------------------------- */ - dk = 0; - nv[k] = -nvk; /* flag k as in Lk */ - p = Cp[k]; - pk1 = (elenk == 0) ? p : cnz; /* do in place if elen[k] == 0 */ - pk2 = pk1; - for(k1 = 1; k1 <= elenk + 1; k1++) - { - if(k1 > elenk) - { - e = k; /* search the nodes in k */ - pj = p; /* list of nodes starts at Ci[pj]*/ - ln = len[k] - elenk; /* length of list of nodes in k */ - } - else - { - e = Ci[p++]; /* search the nodes in e */ - pj = Cp[e]; - ln = len[e]; /* length of list of nodes in e */ - } - for(k2 = 1; k2 <= ln; k2++) - { - i = Ci[pj++]; - if((nvi = nv[i]) <= 0) continue; /* node i dead, or seen */ - dk += nvi; /* degree[Lk] += size of node i */ - nv[i] = -nvi; /* negate nv[i] to denote i in Lk*/ - Ci[pk2++] = i; /* place i in Lk */ - if(next[i] != -1) last[next[i]] = last[i]; - if(last[i] != -1) /* remove i from degree list */ - { - next[last[i]] = next[i]; - } - else - { - head[degree[i]] = next[i]; - } - } - if(e != k) - { - Cp[e] = amd_flip (k); /* absorb e into k */ - w[e] = 0; /* e is now a dead element */ - } - } - if(elenk != 0) cnz = pk2; /* Ci[cnz...nzmax] is free */ - degree[k] = dk; /* external degree of k - |Lk\i| */ - Cp[k] = pk1; /* element k is in Ci[pk1..pk2-1] */ - len[k] = pk2 - pk1; - elen[k] = -2; /* k is now an element */ - - /* --- Find set differences ----------------------------------------- */ - mark = internal::cs_wclear(mark, lemax, w, n); /* clear w if necessary */ - for(pk = pk1; pk < pk2; pk++) /* scan 1: find |Le\Lk| */ - { - i = Ci[pk]; - if((eln = elen[i]) <= 0) continue;/* skip if elen[i] empty */ - nvi = -nv[i]; /* nv[i] was negated */ - wnvi = mark - nvi; - for(p = Cp[i]; p <= Cp[i] + eln - 1; p++) /* scan Ei */ - { - e = Ci[p]; - if(w[e] >= mark) - { - w[e] -= nvi; /* decrement |Le\Lk| */ - } - else if(w[e] != 0) /* ensure e is a live element */ - { - w[e] = degree[e] + wnvi; /* 1st time e seen in scan 1 */ - } - } - } - - /* --- Degree update ------------------------------------------------ */ - for(pk = pk1; pk < pk2; pk++) /* scan2: degree update */ - { - i = Ci[pk]; /* consider node i in Lk */ - p1 = Cp[i]; - p2 = p1 + elen[i] - 1; - pn = p1; - for(h = 0, d = 0, p = p1; p <= p2; p++) /* scan Ei */ - { - e = Ci[p]; - if(w[e] != 0) /* e is an unabsorbed element */ - { - dext = w[e] - mark; /* dext = |Le\Lk| */ - if(dext > 0) - { - d += dext; /* sum up the set differences */ - Ci[pn++] = e; /* keep e in Ei */ - h += e; /* compute the hash of node i */ - } - else - { - Cp[e] = amd_flip (k); /* aggressive absorb. e->k */ - w[e] = 0; /* e is a dead element */ - } - } - } - elen[i] = pn - p1 + 1; /* elen[i] = |Ei| */ - p3 = pn; - p4 = p1 + len[i]; - for(p = p2 + 1; p < p4; p++) /* prune edges in Ai */ - { - j = Ci[p]; - if((nvj = nv[j]) <= 0) continue; /* node j dead or in Lk */ - d += nvj; /* degree(i) += |j| */ - Ci[pn++] = j; /* place j in node list of i */ - h += j; /* compute hash for node i */ - } - if(d == 0) /* check for mass elimination */ - { - Cp[i] = amd_flip (k); /* absorb i into k */ - nvi = -nv[i]; - dk -= nvi; /* |Lk| -= |i| */ - nvk += nvi; /* |k| += nv[i] */ - nel += nvi; - nv[i] = 0; - elen[i] = -1; /* node i is dead */ - } - else - { - degree[i] = std::min (degree[i], d); /* update degree(i) */ - Ci[pn] = Ci[p3]; /* move first node to end */ - Ci[p3] = Ci[p1]; /* move 1st el. to end of Ei */ - Ci[p1] = k; /* add k as 1st element in of Ei */ - len[i] = pn - p1 + 1; /* new len of adj. list of node i */ - h %= n; /* finalize hash of i */ - next[i] = hhead[h]; /* place i in hash bucket */ - hhead[h] = i; - last[i] = h; /* save hash of i in last[i] */ - } - } /* scan2 is done */ - degree[k] = dk; /* finalize |Lk| */ - lemax = std::max(lemax, dk); - mark = internal::cs_wclear(mark+lemax, lemax, w, n); /* clear w */ - - /* --- Supernode detection ------------------------------------------ */ - for(pk = pk1; pk < pk2; pk++) - { - i = Ci[pk]; - if(nv[i] >= 0) continue; /* skip if i is dead */ - h = last[i]; /* scan hash bucket of node i */ - i = hhead[h]; - hhead[h] = -1; /* hash bucket will be empty */ - for(; i != -1 && next[i] != -1; i = next[i], mark++) - { - ln = len[i]; - eln = elen[i]; - for(p = Cp[i]+1; p <= Cp[i] + ln-1; p++) w[Ci[p]] = mark; - jlast = i; - for(j = next[i]; j != -1; ) /* compare i with all j */ - { - ok = (len[j] == ln) && (elen[j] == eln); - for(p = Cp[j] + 1; ok && p <= Cp[j] + ln - 1; p++) - { - if(w[Ci[p]] != mark) ok = 0; /* compare i and j*/ - } - if(ok) /* i and j are identical */ - { - Cp[j] = amd_flip (i); /* absorb j into i */ - nv[i] += nv[j]; - nv[j] = 0; - elen[j] = -1; /* node j is dead */ - j = next[j]; /* delete j from hash bucket */ - next[jlast] = j; - } - else - { - jlast = j; /* j and i are different */ - j = next[j]; - } - } - } - } - - /* --- Finalize new element------------------------------------------ */ - for(p = pk1, pk = pk1; pk < pk2; pk++) /* finalize Lk */ - { - i = Ci[pk]; - if((nvi = -nv[i]) <= 0) continue;/* skip if i is dead */ - nv[i] = nvi; /* restore nv[i] */ - d = degree[i] + dk - nvi; /* compute external degree(i) */ - d = std::min (d, n - nel - nvi); - if(head[d] != -1) last[head[d]] = i; - next[i] = head[d]; /* put i back in degree list */ - last[i] = -1; - head[d] = i; - mindeg = std::min (mindeg, d); /* find new minimum degree */ - degree[i] = d; - Ci[p++] = i; /* place i in Lk */ - } - nv[k] = nvk; /* # nodes absorbed into k */ - if((len[k] = p-pk1) == 0) /* length of adj list of element k*/ - { - Cp[k] = -1; /* k is a root of the tree */ - w[k] = 0; /* k is now a dead element */ - } - if(elenk != 0) cnz = p; /* free unused space in Lk */ - } - - /* --- Postordering ----------------------------------------------------- */ - for(i = 0; i < n; i++) Cp[i] = amd_flip (Cp[i]);/* fix assembly tree */ - for(j = 0; j <= n; j++) head[j] = -1; - for(j = n; j >= 0; j--) /* place unordered nodes in lists */ - { - if(nv[j] > 0) continue; /* skip if j is an element */ - next[j] = head[Cp[j]]; /* place j in list of its parent */ - head[Cp[j]] = j; - } - for(e = n; e >= 0; e--) /* place elements in lists */ - { - if(nv[e] <= 0) continue; /* skip unless e is an element */ - if(Cp[e] != -1) - { - next[e] = head[Cp[e]]; /* place e in list of its parent */ - head[Cp[e]] = e; - } - } - for(k = 0, i = 0; i <= n; i++) /* postorder the assembly tree */ - { - if(Cp[i] == -1) k = internal::cs_tdfs(i, k, head, next, perm.indices().data(), w); - } - - perm.indices().conservativeResize(n); - - delete[] W; -} - -} // namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_AMD_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/CMakeLists.txt index 9f4bb2758..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_OrderingMethods_SRCS "*.h") - -INSTALL(FILES - ${Eigen_OrderingMethods_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/OrderingMethods COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Eigen_Colamd.h b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Eigen_Colamd.h index 359fd4417..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -1,1843 +0,0 @@ -// // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Desire Nuentsa Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// This file is modified from the colamd/symamd library. The copyright is below - -// The authors of the code itself are Stefan I. Larimore and Timothy A. -// Davis (davis@cise.ufl.edu), University of Florida. The algorithm was -// developed in collaboration with John Gilbert, Xerox PARC, and Esmond -// Ng, Oak Ridge National Laboratory. -// -// Date: -// -// September 8, 2003. Version 2.3. -// -// Acknowledgements: -// -// This work was supported by the National Science Foundation, under -// grants DMS-9504974 and DMS-9803599. -// -// Notice: -// -// Copyright (c) 1998-2003 by the University of Florida. -// All Rights Reserved. -// -// THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY -// EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. -// -// Permission is hereby granted to use, copy, modify, and/or distribute -// this program, provided that the Copyright, this License, and the -// Availability of the original version is retained on all copies and made -// accessible to the end-user of any code or package that includes COLAMD -// or any modified version of COLAMD. -// -// Availability: -// -// The colamd/symamd library is available at -// -// http://www.suitesparse.com - - -#ifndef EIGEN_COLAMD_H -#define EIGEN_COLAMD_H - -namespace internal { -/* Ensure that debugging is turned off: */ -#ifndef COLAMD_NDEBUG -#define COLAMD_NDEBUG -#endif /* NDEBUG */ -/* ========================================================================== */ -/* === Knob and statistics definitions ====================================== */ -/* ========================================================================== */ - -/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ -#define COLAMD_KNOBS 20 - -/* number of output statistics. Only stats [0..6] are currently used. */ -#define COLAMD_STATS 20 - -/* knobs [0] and stats [0]: dense row knob and output statistic. */ -#define COLAMD_DENSE_ROW 0 - -/* knobs [1] and stats [1]: dense column knob and output statistic. */ -#define COLAMD_DENSE_COL 1 - -/* stats [2]: memory defragmentation count output statistic */ -#define COLAMD_DEFRAG_COUNT 2 - -/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ -#define COLAMD_STATUS 3 - -/* stats [4..6]: error info, or info on jumbled columns */ -#define COLAMD_INFO1 4 -#define COLAMD_INFO2 5 -#define COLAMD_INFO3 6 - -/* error codes returned in stats [3]: */ -#define COLAMD_OK (0) -#define COLAMD_OK_BUT_JUMBLED (1) -#define COLAMD_ERROR_A_not_present (-1) -#define COLAMD_ERROR_p_not_present (-2) -#define COLAMD_ERROR_nrow_negative (-3) -#define COLAMD_ERROR_ncol_negative (-4) -#define COLAMD_ERROR_nnz_negative (-5) -#define COLAMD_ERROR_p0_nonzero (-6) -#define COLAMD_ERROR_A_too_small (-7) -#define COLAMD_ERROR_col_length_negative (-8) -#define COLAMD_ERROR_row_index_out_of_bounds (-9) -#define COLAMD_ERROR_out_of_memory (-10) -#define COLAMD_ERROR_internal_error (-999) - -/* ========================================================================== */ -/* === Definitions ========================================================== */ -/* ========================================================================== */ - -#define ONES_COMPLEMENT(r) (-(r)-1) - -/* -------------------------------------------------------------------------- */ - -#define COLAMD_EMPTY (-1) - -/* Row and column status */ -#define ALIVE (0) -#define DEAD (-1) - -/* Column status */ -#define DEAD_PRINCIPAL (-1) -#define DEAD_NON_PRINCIPAL (-2) - -/* Macros for row and column status update and checking. */ -#define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) -#define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) -#define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) -#define COL_IS_DEAD(c) (Col [c].start < ALIVE) -#define COL_IS_ALIVE(c) (Col [c].start >= ALIVE) -#define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) -#define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } -#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } -#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } - -/* ========================================================================== */ -/* === Colamd reporting mechanism =========================================== */ -/* ========================================================================== */ - -// == Row and Column structures == -template -struct colamd_col -{ - Index start ; /* index for A of first row in this column, or DEAD */ - /* if column is dead */ - Index length ; /* number of rows in this column */ - union - { - Index thickness ; /* number of original columns represented by this */ - /* col, if the column is alive */ - Index parent ; /* parent in parent tree super-column structure, if */ - /* the column is dead */ - } shared1 ; - union - { - Index score ; /* the score used to maintain heap, if col is alive */ - Index order ; /* pivot ordering of this column, if col is dead */ - } shared2 ; - union - { - Index headhash ; /* head of a hash bucket, if col is at the head of */ - /* a degree list */ - Index hash ; /* hash value, if col is not in a degree list */ - Index prev ; /* previous column in degree list, if col is in a */ - /* degree list (but not at the head of a degree list) */ - } shared3 ; - union - { - Index degree_next ; /* next column, if col is in a degree list */ - Index hash_next ; /* next column, if col is in a hash list */ - } shared4 ; - -}; - -template -struct Colamd_Row -{ - Index start ; /* index for A of first col in this row */ - Index length ; /* number of principal columns in this row */ - union - { - Index degree ; /* number of principal & non-principal columns in row */ - Index p ; /* used as a row pointer in init_rows_cols () */ - } shared1 ; - union - { - Index mark ; /* for computing set differences and marking dead rows*/ - Index first_column ;/* first column in row (used in garbage collection) */ - } shared2 ; - -}; - -/* ========================================================================== */ -/* === Colamd recommended memory size ======================================= */ -/* ========================================================================== */ - -/* - The recommended length Alen of the array A passed to colamd is given by - the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any - argument is negative. 2*nnz space is required for the row and column - indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is - required for the Col and Row arrays, respectively, which are internal to - colamd. An additional n_col space is the minimal amount of "elbow room", - and nnz/5 more space is recommended for run time efficiency. - - This macro is not needed when using symamd. - - Explicit typecast to Index added Sept. 23, 2002, COLAMD version 2.2, to avoid - gcc -pedantic warning messages. -*/ -template -inline Index colamd_c(Index n_col) -{ return Index( ((n_col) + 1) * sizeof (colamd_col) / sizeof (Index) ) ; } - -template -inline Index colamd_r(Index n_row) -{ return Index(((n_row) + 1) * sizeof (Colamd_Row) / sizeof (Index)); } - -// Prototypes of non-user callable routines -template -static Index init_rows_cols (Index n_row, Index n_col, Colamd_Row Row [], colamd_col col [], Index A [], Index p [], Index stats[COLAMD_STATS] ); - -template -static void init_scoring (Index n_row, Index n_col, Colamd_Row Row [], colamd_col Col [], Index A [], Index head [], double knobs[COLAMD_KNOBS], Index *p_n_row2, Index *p_n_col2, Index *p_max_deg); - -template -static Index find_ordering (Index n_row, Index n_col, Index Alen, Colamd_Row Row [], colamd_col Col [], Index A [], Index head [], Index n_col2, Index max_deg, Index pfree); - -template -static void order_children (Index n_col, colamd_col Col [], Index p []); - -template -static void detect_super_cols (colamd_col Col [], Index A [], Index head [], Index row_start, Index row_length ) ; - -template -static Index garbage_collection (Index n_row, Index n_col, Colamd_Row Row [], colamd_col Col [], Index A [], Index *pfree) ; - -template -static inline Index clear_mark (Index n_row, Colamd_Row Row [] ) ; - -/* === No debugging ========================================================= */ - -#define COLAMD_DEBUG0(params) ; -#define COLAMD_DEBUG1(params) ; -#define COLAMD_DEBUG2(params) ; -#define COLAMD_DEBUG3(params) ; -#define COLAMD_DEBUG4(params) ; - -#define COLAMD_ASSERT(expression) ((void) 0) - - -/** - * \brief Returns the recommended value of Alen - * - * Returns recommended value of Alen for use by colamd. - * Returns -1 if any input argument is negative. - * The use of this routine or macro is optional. - * Note that the macro uses its arguments more than once, - * so be careful for side effects, if you pass expressions as arguments to COLAMD_RECOMMENDED. - * - * \param nnz nonzeros in A - * \param n_row number of rows in A - * \param n_col number of columns in A - * \return recommended value of Alen for use by colamd - */ -template -inline Index colamd_recommended ( Index nnz, Index n_row, Index n_col) -{ - if ((nnz) < 0 || (n_row) < 0 || (n_col) < 0) - return (-1); - else - return (2 * (nnz) + colamd_c (n_col) + colamd_r (n_row) + (n_col) + ((nnz) / 5)); -} - -/** - * \brief set default parameters The use of this routine is optional. - * - * Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) - * entries are removed prior to ordering. Columns with more than - * (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to - * ordering, and placed last in the output column ordering. - * - * COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1, - * respectively, in colamd.h. Default values of these two knobs - * are both 0.5. Currently, only knobs [0] and knobs [1] are - * used, but future versions may use more knobs. If so, they will - * be properly set to their defaults by the future version of - * colamd_set_defaults, so that the code that calls colamd will - * not need to change, assuming that you either use - * colamd_set_defaults, or pass a (double *) NULL pointer as the - * knobs array to colamd or symamd. - * - * \param knobs parameter settings for colamd - */ - -static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS]) -{ - /* === Local variables ================================================== */ - - int i ; - - if (!knobs) - { - return ; /* no knobs to initialize */ - } - for (i = 0 ; i < COLAMD_KNOBS ; i++) - { - knobs [i] = 0 ; - } - knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ - knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ -} - -/** - * \brief Computes a column ordering using the column approximate minimum degree ordering - * - * Computes a column ordering (Q) of A such that P(AQ)=LU or - * (AQ)'AQ=LL' have less fill-in and require fewer floating point - * operations than factorizing the unpermuted matrix A or A'A, - * respectively. - * - * - * \param n_row number of rows in A - * \param n_col number of columns in A - * \param Alen, size of the array A - * \param A row indices of the matrix, of size ALen - * \param p column pointers of A, of size n_col+1 - * \param knobs parameter settings for colamd - * \param stats colamd output statistics and error codes - */ -template -static bool colamd(Index n_row, Index n_col, Index Alen, Index *A, Index *p, double knobs[COLAMD_KNOBS], Index stats[COLAMD_STATS]) -{ - /* === Local variables ================================================== */ - - Index i ; /* loop index */ - Index nnz ; /* nonzeros in A */ - Index Row_size ; /* size of Row [], in integers */ - Index Col_size ; /* size of Col [], in integers */ - Index need ; /* minimum required length of A */ - Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ - colamd_col *Col ; /* pointer into A of Col [0..n_col] array */ - Index n_col2 ; /* number of non-dense, non-empty columns */ - Index n_row2 ; /* number of non-dense, non-empty rows */ - Index ngarbage ; /* number of garbage collections performed */ - Index max_deg ; /* maximum row degree */ - double default_knobs [COLAMD_KNOBS] ; /* default knobs array */ - - - /* === Check the input arguments ======================================== */ - - if (!stats) - { - COLAMD_DEBUG0 (("colamd: stats not present\n")) ; - return (false) ; - } - for (i = 0 ; i < COLAMD_STATS ; i++) - { - stats [i] = 0 ; - } - stats [COLAMD_STATUS] = COLAMD_OK ; - stats [COLAMD_INFO1] = -1 ; - stats [COLAMD_INFO2] = -1 ; - - if (!A) /* A is not present */ - { - stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; - COLAMD_DEBUG0 (("colamd: A not present\n")) ; - return (false) ; - } - - if (!p) /* p is not present */ - { - stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; - COLAMD_DEBUG0 (("colamd: p not present\n")) ; - return (false) ; - } - - if (n_row < 0) /* n_row must be >= 0 */ - { - stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ; - stats [COLAMD_INFO1] = n_row ; - COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ; - return (false) ; - } - - if (n_col < 0) /* n_col must be >= 0 */ - { - stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; - stats [COLAMD_INFO1] = n_col ; - COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ; - return (false) ; - } - - nnz = p [n_col] ; - if (nnz < 0) /* nnz must be >= 0 */ - { - stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; - stats [COLAMD_INFO1] = nnz ; - COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ; - return (false) ; - } - - if (p [0] != 0) - { - stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; - stats [COLAMD_INFO1] = p [0] ; - COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ; - return (false) ; - } - - /* === If no knobs, set default knobs =================================== */ - - if (!knobs) - { - colamd_set_defaults (default_knobs) ; - knobs = default_knobs ; - } - - /* === Allocate the Row and Col arrays from array A ===================== */ - - Col_size = colamd_c (n_col) ; - Row_size = colamd_r (n_row) ; - need = 2*nnz + n_col + Col_size + Row_size ; - - if (need > Alen) - { - /* not enough space in array A to perform the ordering */ - stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ; - stats [COLAMD_INFO1] = need ; - stats [COLAMD_INFO2] = Alen ; - COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); - return (false) ; - } - - Alen -= Col_size + Row_size ; - Col = (colamd_col *) &A [Alen] ; - Row = (Colamd_Row *) &A [Alen + Col_size] ; - - /* === Construct the row and column data structures ===================== */ - - if (!Eigen::internal::init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) - { - /* input matrix is invalid */ - COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ; - return (false) ; - } - - /* === Initialize scores, kill dense rows/columns ======================= */ - - Eigen::internal::init_scoring (n_row, n_col, Row, Col, A, p, knobs, - &n_row2, &n_col2, &max_deg) ; - - /* === Order the supercolumns =========================================== */ - - ngarbage = Eigen::internal::find_ordering (n_row, n_col, Alen, Row, Col, A, p, - n_col2, max_deg, 2*nnz) ; - - /* === Order the non-principal columns ================================== */ - - Eigen::internal::order_children (n_col, Col, p) ; - - /* === Return statistics in stats ======================================= */ - - stats [COLAMD_DENSE_ROW] = n_row - n_row2 ; - stats [COLAMD_DENSE_COL] = n_col - n_col2 ; - stats [COLAMD_DEFRAG_COUNT] = ngarbage ; - COLAMD_DEBUG0 (("colamd: done.\n")) ; - return (true) ; -} - -/* ========================================================================== */ -/* === NON-USER-CALLABLE ROUTINES: ========================================== */ -/* ========================================================================== */ - -/* There are no user-callable routines beyond this point in the file */ - - -/* ========================================================================== */ -/* === init_rows_cols ======================================================= */ -/* ========================================================================== */ - -/* - Takes the column form of the matrix in A and creates the row form of the - matrix. Also, row and column attributes are stored in the Col and Row - structs. If the columns are un-sorted or contain duplicate row indices, - this routine will also sort and remove duplicate row indices from the - column form of the matrix. Returns false if the matrix is invalid, - true otherwise. Not user-callable. -*/ -template -static Index init_rows_cols /* returns true if OK, or false otherwise */ - ( - /* === Parameters ======================================================= */ - - Index n_row, /* number of rows of A */ - Index n_col, /* number of columns of A */ - Colamd_Row Row [], /* of size n_row+1 */ - colamd_col Col [], /* of size n_col+1 */ - Index A [], /* row indices of A, of size Alen */ - Index p [], /* pointers to columns in A, of size n_col+1 */ - Index stats [COLAMD_STATS] /* colamd statistics */ - ) -{ - /* === Local variables ================================================== */ - - Index col ; /* a column index */ - Index row ; /* a row index */ - Index *cp ; /* a column pointer */ - Index *cp_end ; /* a pointer to the end of a column */ - Index *rp ; /* a row pointer */ - Index *rp_end ; /* a pointer to the end of a row */ - Index last_row ; /* previous row */ - - /* === Initialize columns, and check column pointers ==================== */ - - for (col = 0 ; col < n_col ; col++) - { - Col [col].start = p [col] ; - Col [col].length = p [col+1] - p [col] ; - - if ((Col [col].length) < 0) // extra parentheses to work-around gcc bug 10200 - { - /* column pointers must be non-decreasing */ - stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; - stats [COLAMD_INFO1] = col ; - stats [COLAMD_INFO2] = Col [col].length ; - COLAMD_DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ; - return (false) ; - } - - Col [col].shared1.thickness = 1 ; - Col [col].shared2.score = 0 ; - Col [col].shared3.prev = COLAMD_EMPTY ; - Col [col].shared4.degree_next = COLAMD_EMPTY ; - } - - /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ - - /* === Scan columns, compute row degrees, and check row indices ========= */ - - stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ - - for (row = 0 ; row < n_row ; row++) - { - Row [row].length = 0 ; - Row [row].shared2.mark = -1 ; - } - - for (col = 0 ; col < n_col ; col++) - { - last_row = -1 ; - - cp = &A [p [col]] ; - cp_end = &A [p [col+1]] ; - - while (cp < cp_end) - { - row = *cp++ ; - - /* make sure row indices within range */ - if (row < 0 || row >= n_row) - { - stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; - stats [COLAMD_INFO1] = col ; - stats [COLAMD_INFO2] = row ; - stats [COLAMD_INFO3] = n_row ; - COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ; - return (false) ; - } - - if (row <= last_row || Row [row].shared2.mark == col) - { - /* row index are unsorted or repeated (or both), thus col */ - /* is jumbled. This is a notice, not an error condition. */ - stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; - stats [COLAMD_INFO1] = col ; - stats [COLAMD_INFO2] = row ; - (stats [COLAMD_INFO3]) ++ ; - COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col)); - } - - if (Row [row].shared2.mark != col) - { - Row [row].length++ ; - } - else - { - /* this is a repeated entry in the column, */ - /* it will be removed */ - Col [col].length-- ; - } - - /* mark the row as having been seen in this column */ - Row [row].shared2.mark = col ; - - last_row = row ; - } - } - - /* === Compute row pointers ============================================= */ - - /* row form of the matrix starts directly after the column */ - /* form of matrix in A */ - Row [0].start = p [n_col] ; - Row [0].shared1.p = Row [0].start ; - Row [0].shared2.mark = -1 ; - for (row = 1 ; row < n_row ; row++) - { - Row [row].start = Row [row-1].start + Row [row-1].length ; - Row [row].shared1.p = Row [row].start ; - Row [row].shared2.mark = -1 ; - } - - /* === Create row form ================================================== */ - - if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) - { - /* if cols jumbled, watch for repeated row indices */ - for (col = 0 ; col < n_col ; col++) - { - cp = &A [p [col]] ; - cp_end = &A [p [col+1]] ; - while (cp < cp_end) - { - row = *cp++ ; - if (Row [row].shared2.mark != col) - { - A [(Row [row].shared1.p)++] = col ; - Row [row].shared2.mark = col ; - } - } - } - } - else - { - /* if cols not jumbled, we don't need the mark (this is faster) */ - for (col = 0 ; col < n_col ; col++) - { - cp = &A [p [col]] ; - cp_end = &A [p [col+1]] ; - while (cp < cp_end) - { - A [(Row [*cp++].shared1.p)++] = col ; - } - } - } - - /* === Clear the row marks and set row degrees ========================== */ - - for (row = 0 ; row < n_row ; row++) - { - Row [row].shared2.mark = 0 ; - Row [row].shared1.degree = Row [row].length ; - } - - /* === See if we need to re-create columns ============================== */ - - if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) - { - COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ; - - - /* === Compute col pointers ========================================= */ - - /* col form of the matrix starts at A [0]. */ - /* Note, we may have a gap between the col form and the row */ - /* form if there were duplicate entries, if so, it will be */ - /* removed upon the first garbage collection */ - Col [0].start = 0 ; - p [0] = Col [0].start ; - for (col = 1 ; col < n_col ; col++) - { - /* note that the lengths here are for pruned columns, i.e. */ - /* no duplicate row indices will exist for these columns */ - Col [col].start = Col [col-1].start + Col [col-1].length ; - p [col] = Col [col].start ; - } - - /* === Re-create col form =========================================== */ - - for (row = 0 ; row < n_row ; row++) - { - rp = &A [Row [row].start] ; - rp_end = rp + Row [row].length ; - while (rp < rp_end) - { - A [(p [*rp++])++] = row ; - } - } - } - - /* === Done. Matrix is not (or no longer) jumbled ====================== */ - - return (true) ; -} - - -/* ========================================================================== */ -/* === init_scoring ========================================================= */ -/* ========================================================================== */ - -/* - Kills dense or empty columns and rows, calculates an initial score for - each column, and places all columns in the degree lists. Not user-callable. -*/ -template -static void init_scoring - ( - /* === Parameters ======================================================= */ - - Index n_row, /* number of rows of A */ - Index n_col, /* number of columns of A */ - Colamd_Row Row [], /* of size n_row+1 */ - colamd_col Col [], /* of size n_col+1 */ - Index A [], /* column form and row form of A */ - Index head [], /* of size n_col+1 */ - double knobs [COLAMD_KNOBS],/* parameters */ - Index *p_n_row2, /* number of non-dense, non-empty rows */ - Index *p_n_col2, /* number of non-dense, non-empty columns */ - Index *p_max_deg /* maximum row degree */ - ) -{ - /* === Local variables ================================================== */ - - Index c ; /* a column index */ - Index r, row ; /* a row index */ - Index *cp ; /* a column pointer */ - Index deg ; /* degree of a row or column */ - Index *cp_end ; /* a pointer to the end of a column */ - Index *new_cp ; /* new column pointer */ - Index col_length ; /* length of pruned column */ - Index score ; /* current column score */ - Index n_col2 ; /* number of non-dense, non-empty columns */ - Index n_row2 ; /* number of non-dense, non-empty rows */ - Index dense_row_count ; /* remove rows with more entries than this */ - Index dense_col_count ; /* remove cols with more entries than this */ - Index min_score ; /* smallest column score */ - Index max_deg ; /* maximum row degree */ - Index next_col ; /* Used to add to degree list.*/ - - - /* === Extract knobs ==================================================== */ - - dense_row_count = std::max(0, (std::min)(Index(knobs [COLAMD_DENSE_ROW] * n_col), n_col)) ; - dense_col_count = std::max(0, (std::min)(Index(knobs [COLAMD_DENSE_COL] * n_row), n_row)) ; - COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; - max_deg = 0 ; - n_col2 = n_col ; - n_row2 = n_row ; - - /* === Kill empty columns =============================================== */ - - /* Put the empty columns at the end in their natural order, so that LU */ - /* factorization can proceed as far as possible. */ - for (c = n_col-1 ; c >= 0 ; c--) - { - deg = Col [c].length ; - if (deg == 0) - { - /* this is a empty column, kill and order it last */ - Col [c].shared2.order = --n_col2 ; - KILL_PRINCIPAL_COL (c) ; - } - } - COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ; - - /* === Kill dense columns =============================================== */ - - /* Put the dense columns at the end, in their natural order */ - for (c = n_col-1 ; c >= 0 ; c--) - { - /* skip any dead columns */ - if (COL_IS_DEAD (c)) - { - continue ; - } - deg = Col [c].length ; - if (deg > dense_col_count) - { - /* this is a dense column, kill and order it last */ - Col [c].shared2.order = --n_col2 ; - /* decrement the row degrees */ - cp = &A [Col [c].start] ; - cp_end = cp + Col [c].length ; - while (cp < cp_end) - { - Row [*cp++].shared1.degree-- ; - } - KILL_PRINCIPAL_COL (c) ; - } - } - COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; - - /* === Kill dense and empty rows ======================================== */ - - for (r = 0 ; r < n_row ; r++) - { - deg = Row [r].shared1.degree ; - COLAMD_ASSERT (deg >= 0 && deg <= n_col) ; - if (deg > dense_row_count || deg == 0) - { - /* kill a dense or empty row */ - KILL_ROW (r) ; - --n_row2 ; - } - else - { - /* keep track of max degree of remaining rows */ - max_deg = (std::max)(max_deg, deg) ; - } - } - COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; - - /* === Compute initial column scores ==================================== */ - - /* At this point the row degrees are accurate. They reflect the number */ - /* of "live" (non-dense) columns in each row. No empty rows exist. */ - /* Some "live" columns may contain only dead rows, however. These are */ - /* pruned in the code below. */ - - /* now find the initial matlab score for each column */ - for (c = n_col-1 ; c >= 0 ; c--) - { - /* skip dead column */ - if (COL_IS_DEAD (c)) - { - continue ; - } - score = 0 ; - cp = &A [Col [c].start] ; - new_cp = cp ; - cp_end = cp + Col [c].length ; - while (cp < cp_end) - { - /* get a row */ - row = *cp++ ; - /* skip if dead */ - if (ROW_IS_DEAD (row)) - { - continue ; - } - /* compact the column */ - *new_cp++ = row ; - /* add row's external degree */ - score += Row [row].shared1.degree - 1 ; - /* guard against integer overflow */ - score = (std::min)(score, n_col) ; - } - /* determine pruned column length */ - col_length = (Index) (new_cp - &A [Col [c].start]) ; - if (col_length == 0) - { - /* a newly-made null column (all rows in this col are "dense" */ - /* and have already been killed) */ - COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ; - Col [c].shared2.order = --n_col2 ; - KILL_PRINCIPAL_COL (c) ; - } - else - { - /* set column length and set score */ - COLAMD_ASSERT (score >= 0) ; - COLAMD_ASSERT (score <= n_col) ; - Col [c].length = col_length ; - Col [c].shared2.score = score ; - } - } - COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n", - n_col-n_col2)) ; - - /* At this point, all empty rows and columns are dead. All live columns */ - /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ - /* yet). Rows may contain dead columns, but all live rows contain at */ - /* least one live column. */ - - /* === Initialize degree lists ========================================== */ - - - /* clear the hash buckets */ - for (c = 0 ; c <= n_col ; c++) - { - head [c] = COLAMD_EMPTY ; - } - min_score = n_col ; - /* place in reverse order, so low column indices are at the front */ - /* of the lists. This is to encourage natural tie-breaking */ - for (c = n_col-1 ; c >= 0 ; c--) - { - /* only add principal columns to degree lists */ - if (COL_IS_ALIVE (c)) - { - COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n", - c, Col [c].shared2.score, min_score, n_col)) ; - - /* === Add columns score to DList =============================== */ - - score = Col [c].shared2.score ; - - COLAMD_ASSERT (min_score >= 0) ; - COLAMD_ASSERT (min_score <= n_col) ; - COLAMD_ASSERT (score >= 0) ; - COLAMD_ASSERT (score <= n_col) ; - COLAMD_ASSERT (head [score] >= COLAMD_EMPTY) ; - - /* now add this column to dList at proper score location */ - next_col = head [score] ; - Col [c].shared3.prev = COLAMD_EMPTY ; - Col [c].shared4.degree_next = next_col ; - - /* if there already was a column with the same score, set its */ - /* previous pointer to this new column */ - if (next_col != COLAMD_EMPTY) - { - Col [next_col].shared3.prev = c ; - } - head [score] = c ; - - /* see if this score is less than current min */ - min_score = (std::min)(min_score, score) ; - - - } - } - - - /* === Return number of remaining columns, and max row degree =========== */ - - *p_n_col2 = n_col2 ; - *p_n_row2 = n_row2 ; - *p_max_deg = max_deg ; -} - - -/* ========================================================================== */ -/* === find_ordering ======================================================== */ -/* ========================================================================== */ - -/* - Order the principal columns of the supercolumn form of the matrix - (no supercolumns on input). Uses a minimum approximate column minimum - degree ordering method. Not user-callable. -*/ -template -static Index find_ordering /* return the number of garbage collections */ - ( - /* === Parameters ======================================================= */ - - Index n_row, /* number of rows of A */ - Index n_col, /* number of columns of A */ - Index Alen, /* size of A, 2*nnz + n_col or larger */ - Colamd_Row Row [], /* of size n_row+1 */ - colamd_col Col [], /* of size n_col+1 */ - Index A [], /* column form and row form of A */ - Index head [], /* of size n_col+1 */ - Index n_col2, /* Remaining columns to order */ - Index max_deg, /* Maximum row degree */ - Index pfree /* index of first free slot (2*nnz on entry) */ - ) -{ - /* === Local variables ================================================== */ - - Index k ; /* current pivot ordering step */ - Index pivot_col ; /* current pivot column */ - Index *cp ; /* a column pointer */ - Index *rp ; /* a row pointer */ - Index pivot_row ; /* current pivot row */ - Index *new_cp ; /* modified column pointer */ - Index *new_rp ; /* modified row pointer */ - Index pivot_row_start ; /* pointer to start of pivot row */ - Index pivot_row_degree ; /* number of columns in pivot row */ - Index pivot_row_length ; /* number of supercolumns in pivot row */ - Index pivot_col_score ; /* score of pivot column */ - Index needed_memory ; /* free space needed for pivot row */ - Index *cp_end ; /* pointer to the end of a column */ - Index *rp_end ; /* pointer to the end of a row */ - Index row ; /* a row index */ - Index col ; /* a column index */ - Index max_score ; /* maximum possible score */ - Index cur_score ; /* score of current column */ - unsigned int hash ; /* hash value for supernode detection */ - Index head_column ; /* head of hash bucket */ - Index first_col ; /* first column in hash bucket */ - Index tag_mark ; /* marker value for mark array */ - Index row_mark ; /* Row [row].shared2.mark */ - Index set_difference ; /* set difference size of row with pivot row */ - Index min_score ; /* smallest column score */ - Index col_thickness ; /* "thickness" (no. of columns in a supercol) */ - Index max_mark ; /* maximum value of tag_mark */ - Index pivot_col_thickness ; /* number of columns represented by pivot col */ - Index prev_col ; /* Used by Dlist operations. */ - Index next_col ; /* Used by Dlist operations. */ - Index ngarbage ; /* number of garbage collections performed */ - - - /* === Initialization and clear mark ==================================== */ - - max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ - tag_mark = Eigen::internal::clear_mark (n_row, Row) ; - min_score = 0 ; - ngarbage = 0 ; - COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ; - - /* === Order the columns ================================================ */ - - for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) - { - - /* === Select pivot column, and order it ============================ */ - - /* make sure degree list isn't empty */ - COLAMD_ASSERT (min_score >= 0) ; - COLAMD_ASSERT (min_score <= n_col) ; - COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; - - /* get pivot column from head of minimum degree list */ - while (head [min_score] == COLAMD_EMPTY && min_score < n_col) - { - min_score++ ; - } - pivot_col = head [min_score] ; - COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; - next_col = Col [pivot_col].shared4.degree_next ; - head [min_score] = next_col ; - if (next_col != COLAMD_EMPTY) - { - Col [next_col].shared3.prev = COLAMD_EMPTY ; - } - - COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ; - COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ; - - /* remember score for defrag check */ - pivot_col_score = Col [pivot_col].shared2.score ; - - /* the pivot column is the kth column in the pivot order */ - Col [pivot_col].shared2.order = k ; - - /* increment order count by column thickness */ - pivot_col_thickness = Col [pivot_col].shared1.thickness ; - k += pivot_col_thickness ; - COLAMD_ASSERT (pivot_col_thickness > 0) ; - - /* === Garbage_collection, if necessary ============================= */ - - needed_memory = (std::min)(pivot_col_score, n_col - k) ; - if (pfree + needed_memory >= Alen) - { - pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; - ngarbage++ ; - /* after garbage collection we will have enough */ - COLAMD_ASSERT (pfree + needed_memory < Alen) ; - /* garbage collection has wiped out the Row[].shared2.mark array */ - tag_mark = Eigen::internal::clear_mark (n_row, Row) ; - - } - - /* === Compute pivot row pattern ==================================== */ - - /* get starting location for this new merged row */ - pivot_row_start = pfree ; - - /* initialize new row counts to zero */ - pivot_row_degree = 0 ; - - /* tag pivot column as having been visited so it isn't included */ - /* in merged pivot row */ - Col [pivot_col].shared1.thickness = -pivot_col_thickness ; - - /* pivot row is the union of all rows in the pivot column pattern */ - cp = &A [Col [pivot_col].start] ; - cp_end = cp + Col [pivot_col].length ; - while (cp < cp_end) - { - /* get a row */ - row = *cp++ ; - COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ; - /* skip if row is dead */ - if (ROW_IS_DEAD (row)) - { - continue ; - } - rp = &A [Row [row].start] ; - rp_end = rp + Row [row].length ; - while (rp < rp_end) - { - /* get a column */ - col = *rp++ ; - /* add the column, if alive and untagged */ - col_thickness = Col [col].shared1.thickness ; - if (col_thickness > 0 && COL_IS_ALIVE (col)) - { - /* tag column in pivot row */ - Col [col].shared1.thickness = -col_thickness ; - COLAMD_ASSERT (pfree < Alen) ; - /* place column in pivot row */ - A [pfree++] = col ; - pivot_row_degree += col_thickness ; - } - } - } - - /* clear tag on pivot column */ - Col [pivot_col].shared1.thickness = pivot_col_thickness ; - max_deg = (std::max)(max_deg, pivot_row_degree) ; - - - /* === Kill all rows used to construct pivot row ==================== */ - - /* also kill pivot row, temporarily */ - cp = &A [Col [pivot_col].start] ; - cp_end = cp + Col [pivot_col].length ; - while (cp < cp_end) - { - /* may be killing an already dead row */ - row = *cp++ ; - COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ; - KILL_ROW (row) ; - } - - /* === Select a row index to use as the new pivot row =============== */ - - pivot_row_length = pfree - pivot_row_start ; - if (pivot_row_length > 0) - { - /* pick the "pivot" row arbitrarily (first row in col) */ - pivot_row = A [Col [pivot_col].start] ; - COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; - } - else - { - /* there is no pivot row, since it is of zero length */ - pivot_row = COLAMD_EMPTY ; - COLAMD_ASSERT (pivot_row_length == 0) ; - } - COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; - - /* === Approximate degree computation =============================== */ - - /* Here begins the computation of the approximate degree. The column */ - /* score is the sum of the pivot row "length", plus the size of the */ - /* set differences of each row in the column minus the pattern of the */ - /* pivot row itself. The column ("thickness") itself is also */ - /* excluded from the column score (we thus use an approximate */ - /* external degree). */ - - /* The time taken by the following code (compute set differences, and */ - /* add them up) is proportional to the size of the data structure */ - /* being scanned - that is, the sum of the sizes of each column in */ - /* the pivot row. Thus, the amortized time to compute a column score */ - /* is proportional to the size of that column (where size, in this */ - /* context, is the column "length", or the number of row indices */ - /* in that column). The number of row indices in a column is */ - /* monotonically non-decreasing, from the length of the original */ - /* column on input to colamd. */ - - /* === Compute set differences ====================================== */ - - COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ; - - /* pivot row is currently dead - it will be revived later. */ - - COLAMD_DEBUG3 (("Pivot row: ")) ; - /* for each column in pivot row */ - rp = &A [pivot_row_start] ; - rp_end = rp + pivot_row_length ; - while (rp < rp_end) - { - col = *rp++ ; - COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; - COLAMD_DEBUG3 (("Col: %d\n", col)) ; - - /* clear tags used to construct pivot row pattern */ - col_thickness = -Col [col].shared1.thickness ; - COLAMD_ASSERT (col_thickness > 0) ; - Col [col].shared1.thickness = col_thickness ; - - /* === Remove column from degree list =========================== */ - - cur_score = Col [col].shared2.score ; - prev_col = Col [col].shared3.prev ; - next_col = Col [col].shared4.degree_next ; - COLAMD_ASSERT (cur_score >= 0) ; - COLAMD_ASSERT (cur_score <= n_col) ; - COLAMD_ASSERT (cur_score >= COLAMD_EMPTY) ; - if (prev_col == COLAMD_EMPTY) - { - head [cur_score] = next_col ; - } - else - { - Col [prev_col].shared4.degree_next = next_col ; - } - if (next_col != COLAMD_EMPTY) - { - Col [next_col].shared3.prev = prev_col ; - } - - /* === Scan the column ========================================== */ - - cp = &A [Col [col].start] ; - cp_end = cp + Col [col].length ; - while (cp < cp_end) - { - /* get a row */ - row = *cp++ ; - row_mark = Row [row].shared2.mark ; - /* skip if dead */ - if (ROW_IS_MARKED_DEAD (row_mark)) - { - continue ; - } - COLAMD_ASSERT (row != pivot_row) ; - set_difference = row_mark - tag_mark ; - /* check if the row has been seen yet */ - if (set_difference < 0) - { - COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; - set_difference = Row [row].shared1.degree ; - } - /* subtract column thickness from this row's set difference */ - set_difference -= col_thickness ; - COLAMD_ASSERT (set_difference >= 0) ; - /* absorb this row if the set difference becomes zero */ - if (set_difference == 0) - { - COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; - KILL_ROW (row) ; - } - else - { - /* save the new mark */ - Row [row].shared2.mark = set_difference + tag_mark ; - } - } - } - - - /* === Add up set differences for each column ======================= */ - - COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ; - - /* for each column in pivot row */ - rp = &A [pivot_row_start] ; - rp_end = rp + pivot_row_length ; - while (rp < rp_end) - { - /* get a column */ - col = *rp++ ; - COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; - hash = 0 ; - cur_score = 0 ; - cp = &A [Col [col].start] ; - /* compact the column */ - new_cp = cp ; - cp_end = cp + Col [col].length ; - - COLAMD_DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; - - while (cp < cp_end) - { - /* get a row */ - row = *cp++ ; - COLAMD_ASSERT(row >= 0 && row < n_row) ; - row_mark = Row [row].shared2.mark ; - /* skip if dead */ - if (ROW_IS_MARKED_DEAD (row_mark)) - { - continue ; - } - COLAMD_ASSERT (row_mark > tag_mark) ; - /* compact the column */ - *new_cp++ = row ; - /* compute hash function */ - hash += row ; - /* add set difference */ - cur_score += row_mark - tag_mark ; - /* integer overflow... */ - cur_score = (std::min)(cur_score, n_col) ; - } - - /* recompute the column's length */ - Col [col].length = (Index) (new_cp - &A [Col [col].start]) ; - - /* === Further mass elimination ================================= */ - - if (Col [col].length == 0) - { - COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ; - /* nothing left but the pivot row in this column */ - KILL_PRINCIPAL_COL (col) ; - pivot_row_degree -= Col [col].shared1.thickness ; - COLAMD_ASSERT (pivot_row_degree >= 0) ; - /* order it */ - Col [col].shared2.order = k ; - /* increment order count by column thickness */ - k += Col [col].shared1.thickness ; - } - else - { - /* === Prepare for supercolumn detection ==================== */ - - COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; - - /* save score so far */ - Col [col].shared2.score = cur_score ; - - /* add column to hash table, for supercolumn detection */ - hash %= n_col + 1 ; - - COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; - COLAMD_ASSERT (hash <= n_col) ; - - head_column = head [hash] ; - if (head_column > COLAMD_EMPTY) - { - /* degree list "hash" is non-empty, use prev (shared3) of */ - /* first column in degree list as head of hash bucket */ - first_col = Col [head_column].shared3.headhash ; - Col [head_column].shared3.headhash = col ; - } - else - { - /* degree list "hash" is empty, use head as hash bucket */ - first_col = - (head_column + 2) ; - head [hash] = - (col + 2) ; - } - Col [col].shared4.hash_next = first_col ; - - /* save hash function in Col [col].shared3.hash */ - Col [col].shared3.hash = (Index) hash ; - COLAMD_ASSERT (COL_IS_ALIVE (col)) ; - } - } - - /* The approximate external column degree is now computed. */ - - /* === Supercolumn detection ======================================== */ - - COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ; - - Eigen::internal::detect_super_cols (Col, A, head, pivot_row_start, pivot_row_length) ; - - /* === Kill the pivotal column ====================================== */ - - KILL_PRINCIPAL_COL (pivot_col) ; - - /* === Clear mark =================================================== */ - - tag_mark += (max_deg + 1) ; - if (tag_mark >= max_mark) - { - COLAMD_DEBUG2 (("clearing tag_mark\n")) ; - tag_mark = Eigen::internal::clear_mark (n_row, Row) ; - } - - /* === Finalize the new pivot row, and column scores ================ */ - - COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ; - - /* for each column in pivot row */ - rp = &A [pivot_row_start] ; - /* compact the pivot row */ - new_rp = rp ; - rp_end = rp + pivot_row_length ; - while (rp < rp_end) - { - col = *rp++ ; - /* skip dead columns */ - if (COL_IS_DEAD (col)) - { - continue ; - } - *new_rp++ = col ; - /* add new pivot row to column */ - A [Col [col].start + (Col [col].length++)] = pivot_row ; - - /* retrieve score so far and add on pivot row's degree. */ - /* (we wait until here for this in case the pivot */ - /* row's degree was reduced due to mass elimination). */ - cur_score = Col [col].shared2.score + pivot_row_degree ; - - /* calculate the max possible score as the number of */ - /* external columns minus the 'k' value minus the */ - /* columns thickness */ - max_score = n_col - k - Col [col].shared1.thickness ; - - /* make the score the external degree of the union-of-rows */ - cur_score -= Col [col].shared1.thickness ; - - /* make sure score is less or equal than the max score */ - cur_score = (std::min)(cur_score, max_score) ; - COLAMD_ASSERT (cur_score >= 0) ; - - /* store updated score */ - Col [col].shared2.score = cur_score ; - - /* === Place column back in degree list ========================= */ - - COLAMD_ASSERT (min_score >= 0) ; - COLAMD_ASSERT (min_score <= n_col) ; - COLAMD_ASSERT (cur_score >= 0) ; - COLAMD_ASSERT (cur_score <= n_col) ; - COLAMD_ASSERT (head [cur_score] >= COLAMD_EMPTY) ; - next_col = head [cur_score] ; - Col [col].shared4.degree_next = next_col ; - Col [col].shared3.prev = COLAMD_EMPTY ; - if (next_col != COLAMD_EMPTY) - { - Col [next_col].shared3.prev = col ; - } - head [cur_score] = col ; - - /* see if this score is less than current min */ - min_score = (std::min)(min_score, cur_score) ; - - } - - /* === Resurrect the new pivot row ================================== */ - - if (pivot_row_degree > 0) - { - /* update pivot row length to reflect any cols that were killed */ - /* during super-col detection and mass elimination */ - Row [pivot_row].start = pivot_row_start ; - Row [pivot_row].length = (Index) (new_rp - &A[pivot_row_start]) ; - Row [pivot_row].shared1.degree = pivot_row_degree ; - Row [pivot_row].shared2.mark = 0 ; - /* pivot row is no longer dead */ - } - } - - /* === All principal columns have now been ordered ====================== */ - - return (ngarbage) ; -} - - -/* ========================================================================== */ -/* === order_children ======================================================= */ -/* ========================================================================== */ - -/* - The find_ordering routine has ordered all of the principal columns (the - representatives of the supercolumns). The non-principal columns have not - yet been ordered. This routine orders those columns by walking up the - parent tree (a column is a child of the column which absorbed it). The - final permutation vector is then placed in p [0 ... n_col-1], with p [0] - being the first column, and p [n_col-1] being the last. It doesn't look - like it at first glance, but be assured that this routine takes time linear - in the number of columns. Although not immediately obvious, the time - taken by this routine is O (n_col), that is, linear in the number of - columns. Not user-callable. -*/ -template -static inline void order_children -( - /* === Parameters ======================================================= */ - - Index n_col, /* number of columns of A */ - colamd_col Col [], /* of size n_col+1 */ - Index p [] /* p [0 ... n_col-1] is the column permutation*/ - ) -{ - /* === Local variables ================================================== */ - - Index i ; /* loop counter for all columns */ - Index c ; /* column index */ - Index parent ; /* index of column's parent */ - Index order ; /* column's order */ - - /* === Order each non-principal column ================================== */ - - for (i = 0 ; i < n_col ; i++) - { - /* find an un-ordered non-principal column */ - COLAMD_ASSERT (COL_IS_DEAD (i)) ; - if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY) - { - parent = i ; - /* once found, find its principal parent */ - do - { - parent = Col [parent].shared1.parent ; - } while (!COL_IS_DEAD_PRINCIPAL (parent)) ; - - /* now, order all un-ordered non-principal columns along path */ - /* to this parent. collapse tree at the same time */ - c = i ; - /* get order of parent */ - order = Col [parent].shared2.order ; - - do - { - COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ; - - /* order this column */ - Col [c].shared2.order = order++ ; - /* collaps tree */ - Col [c].shared1.parent = parent ; - - /* get immediate parent of this column */ - c = Col [c].shared1.parent ; - - /* continue until we hit an ordered column. There are */ - /* guarranteed not to be anymore unordered columns */ - /* above an ordered column */ - } while (Col [c].shared2.order == COLAMD_EMPTY) ; - - /* re-order the super_col parent to largest order for this group */ - Col [parent].shared2.order = order ; - } - } - - /* === Generate the permutation ========================================= */ - - for (c = 0 ; c < n_col ; c++) - { - p [Col [c].shared2.order] = c ; - } -} - - -/* ========================================================================== */ -/* === detect_super_cols ==================================================== */ -/* ========================================================================== */ - -/* - Detects supercolumns by finding matches between columns in the hash buckets. - Check amongst columns in the set A [row_start ... row_start + row_length-1]. - The columns under consideration are currently *not* in the degree lists, - and have already been placed in the hash buckets. - - The hash bucket for columns whose hash function is equal to h is stored - as follows: - - if head [h] is >= 0, then head [h] contains a degree list, so: - - head [h] is the first column in degree bucket h. - Col [head [h]].headhash gives the first column in hash bucket h. - - otherwise, the degree list is empty, and: - - -(head [h] + 2) is the first column in hash bucket h. - - For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous - column" pointer. Col [c].shared3.hash is used instead as the hash number - for that column. The value of Col [c].shared4.hash_next is the next column - in the same hash bucket. - - Assuming no, or "few" hash collisions, the time taken by this routine is - linear in the sum of the sizes (lengths) of each column whose score has - just been computed in the approximate degree computation. - Not user-callable. -*/ -template -static void detect_super_cols -( - /* === Parameters ======================================================= */ - - colamd_col Col [], /* of size n_col+1 */ - Index A [], /* row indices of A */ - Index head [], /* head of degree lists and hash buckets */ - Index row_start, /* pointer to set of columns to check */ - Index row_length /* number of columns to check */ -) -{ - /* === Local variables ================================================== */ - - Index hash ; /* hash value for a column */ - Index *rp ; /* pointer to a row */ - Index c ; /* a column index */ - Index super_c ; /* column index of the column to absorb into */ - Index *cp1 ; /* column pointer for column super_c */ - Index *cp2 ; /* column pointer for column c */ - Index length ; /* length of column super_c */ - Index prev_c ; /* column preceding c in hash bucket */ - Index i ; /* loop counter */ - Index *rp_end ; /* pointer to the end of the row */ - Index col ; /* a column index in the row to check */ - Index head_column ; /* first column in hash bucket or degree list */ - Index first_col ; /* first column in hash bucket */ - - /* === Consider each column in the row ================================== */ - - rp = &A [row_start] ; - rp_end = rp + row_length ; - while (rp < rp_end) - { - col = *rp++ ; - if (COL_IS_DEAD (col)) - { - continue ; - } - - /* get hash number for this column */ - hash = Col [col].shared3.hash ; - COLAMD_ASSERT (hash <= n_col) ; - - /* === Get the first column in this hash bucket ===================== */ - - head_column = head [hash] ; - if (head_column > COLAMD_EMPTY) - { - first_col = Col [head_column].shared3.headhash ; - } - else - { - first_col = - (head_column + 2) ; - } - - /* === Consider each column in the hash bucket ====================== */ - - for (super_c = first_col ; super_c != COLAMD_EMPTY ; - super_c = Col [super_c].shared4.hash_next) - { - COLAMD_ASSERT (COL_IS_ALIVE (super_c)) ; - COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ; - length = Col [super_c].length ; - - /* prev_c is the column preceding column c in the hash bucket */ - prev_c = super_c ; - - /* === Compare super_c with all columns after it ================ */ - - for (c = Col [super_c].shared4.hash_next ; - c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next) - { - COLAMD_ASSERT (c != super_c) ; - COLAMD_ASSERT (COL_IS_ALIVE (c)) ; - COLAMD_ASSERT (Col [c].shared3.hash == hash) ; - - /* not identical if lengths or scores are different */ - if (Col [c].length != length || - Col [c].shared2.score != Col [super_c].shared2.score) - { - prev_c = c ; - continue ; - } - - /* compare the two columns */ - cp1 = &A [Col [super_c].start] ; - cp2 = &A [Col [c].start] ; - - for (i = 0 ; i < length ; i++) - { - /* the columns are "clean" (no dead rows) */ - COLAMD_ASSERT (ROW_IS_ALIVE (*cp1)) ; - COLAMD_ASSERT (ROW_IS_ALIVE (*cp2)) ; - /* row indices will same order for both supercols, */ - /* no gather scatter nessasary */ - if (*cp1++ != *cp2++) - { - break ; - } - } - - /* the two columns are different if the for-loop "broke" */ - if (i != length) - { - prev_c = c ; - continue ; - } - - /* === Got it! two columns are identical =================== */ - - COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; - - Col [super_c].shared1.thickness += Col [c].shared1.thickness ; - Col [c].shared1.parent = super_c ; - KILL_NON_PRINCIPAL_COL (c) ; - /* order c later, in order_children() */ - Col [c].shared2.order = COLAMD_EMPTY ; - /* remove c from hash bucket */ - Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; - } - } - - /* === Empty this hash bucket ======================================= */ - - if (head_column > COLAMD_EMPTY) - { - /* corresponding degree list "hash" is not empty */ - Col [head_column].shared3.headhash = COLAMD_EMPTY ; - } - else - { - /* corresponding degree list "hash" is empty */ - head [hash] = COLAMD_EMPTY ; - } - } -} - - -/* ========================================================================== */ -/* === garbage_collection =================================================== */ -/* ========================================================================== */ - -/* - Defragments and compacts columns and rows in the workspace A. Used when - all avaliable memory has been used while performing row merging. Returns - the index of the first free position in A, after garbage collection. The - time taken by this routine is linear is the size of the array A, which is - itself linear in the number of nonzeros in the input matrix. - Not user-callable. -*/ -template -static Index garbage_collection /* returns the new value of pfree */ - ( - /* === Parameters ======================================================= */ - - Index n_row, /* number of rows */ - Index n_col, /* number of columns */ - Colamd_Row Row [], /* row info */ - colamd_col Col [], /* column info */ - Index A [], /* A [0 ... Alen-1] holds the matrix */ - Index *pfree /* &A [0] ... pfree is in use */ - ) -{ - /* === Local variables ================================================== */ - - Index *psrc ; /* source pointer */ - Index *pdest ; /* destination pointer */ - Index j ; /* counter */ - Index r ; /* a row index */ - Index c ; /* a column index */ - Index length ; /* length of a row or column */ - - /* === Defragment the columns =========================================== */ - - pdest = &A[0] ; - for (c = 0 ; c < n_col ; c++) - { - if (COL_IS_ALIVE (c)) - { - psrc = &A [Col [c].start] ; - - /* move and compact the column */ - COLAMD_ASSERT (pdest <= psrc) ; - Col [c].start = (Index) (pdest - &A [0]) ; - length = Col [c].length ; - for (j = 0 ; j < length ; j++) - { - r = *psrc++ ; - if (ROW_IS_ALIVE (r)) - { - *pdest++ = r ; - } - } - Col [c].length = (Index) (pdest - &A [Col [c].start]) ; - } - } - - /* === Prepare to defragment the rows =================================== */ - - for (r = 0 ; r < n_row ; r++) - { - if (ROW_IS_ALIVE (r)) - { - if (Row [r].length == 0) - { - /* this row is of zero length. cannot compact it, so kill it */ - COLAMD_DEBUG3 (("Defrag row kill\n")) ; - KILL_ROW (r) ; - } - else - { - /* save first column index in Row [r].shared2.first_column */ - psrc = &A [Row [r].start] ; - Row [r].shared2.first_column = *psrc ; - COLAMD_ASSERT (ROW_IS_ALIVE (r)) ; - /* flag the start of the row with the one's complement of row */ - *psrc = ONES_COMPLEMENT (r) ; - - } - } - } - - /* === Defragment the rows ============================================== */ - - psrc = pdest ; - while (psrc < pfree) - { - /* find a negative number ... the start of a row */ - if (*psrc++ < 0) - { - psrc-- ; - /* get the row index */ - r = ONES_COMPLEMENT (*psrc) ; - COLAMD_ASSERT (r >= 0 && r < n_row) ; - /* restore first column index */ - *psrc = Row [r].shared2.first_column ; - COLAMD_ASSERT (ROW_IS_ALIVE (r)) ; - - /* move and compact the row */ - COLAMD_ASSERT (pdest <= psrc) ; - Row [r].start = (Index) (pdest - &A [0]) ; - length = Row [r].length ; - for (j = 0 ; j < length ; j++) - { - c = *psrc++ ; - if (COL_IS_ALIVE (c)) - { - *pdest++ = c ; - } - } - Row [r].length = (Index) (pdest - &A [Row [r].start]) ; - - } - } - /* ensure we found all the rows */ - COLAMD_ASSERT (debug_rows == 0) ; - - /* === Return the new value of pfree ==================================== */ - - return ((Index) (pdest - &A [0])) ; -} - - -/* ========================================================================== */ -/* === clear_mark =========================================================== */ -/* ========================================================================== */ - -/* - Clears the Row [].shared2.mark array, and returns the new tag_mark. - Return value is the new tag_mark. Not user-callable. -*/ -template -static inline Index clear_mark /* return the new value for tag_mark */ - ( - /* === Parameters ======================================================= */ - - Index n_row, /* number of rows in A */ - Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ - ) -{ - /* === Local variables ================================================== */ - - Index r ; - - for (r = 0 ; r < n_row ; r++) - { - if (ROW_IS_ALIVE (r)) - { - Row [r].shared2.mark = 0 ; - } - } - return (1) ; -} - - -} // namespace internal -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Ordering.h b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Ordering.h index f3c31f9cb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Ordering.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/OrderingMethods/Ordering.h @@ -1,154 +0,0 @@ - -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ORDERING_H -#define EIGEN_ORDERING_H - -namespace Eigen { - -#include "Eigen_Colamd.h" - -namespace internal { - -/** \internal - * \ingroup OrderingMethods_Module - * \returns the symmetric pattern A^T+A from the input matrix A. - * FIXME: The values should not be considered here - */ -template -void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat) -{ - MatrixType C; - C = mat.transpose(); // NOTE: Could be costly - for (int i = 0; i < C.rows(); i++) - { - for (typename MatrixType::InnerIterator it(C, i); it; ++it) - it.valueRef() = 0.0; - } - symmat = C + mat; -} - -} - -#ifndef EIGEN_MPL2_ONLY - -/** \ingroup OrderingMethods_Module - * \class AMDOrdering - * - * Functor computing the \em approximate \em minimum \em degree ordering - * If the matrix is not structurally symmetric, an ordering of A^T+A is computed - * \tparam Index The type of indices of the matrix - * \sa COLAMDOrdering - */ -template -class AMDOrdering -{ - public: - typedef PermutationMatrix PermutationType; - - /** Compute the permutation vector from a sparse matrix - * This routine is much faster if the input matrix is column-major - */ - template - void operator()(const MatrixType& mat, PermutationType& perm) - { - // Compute the symmetric pattern - SparseMatrix symm; - internal::ordering_helper_at_plus_a(mat,symm); - - // Call the AMD routine - //m_mat.prune(keep_diag()); - internal::minimum_degree_ordering(symm, perm); - } - - /** Compute the permutation with a selfadjoint matrix */ - template - void operator()(const SparseSelfAdjointView& mat, PermutationType& perm) - { - SparseMatrix C; C = mat; - - // Call the AMD routine - // m_mat.prune(keep_diag()); //Remove the diagonal elements - internal::minimum_degree_ordering(C, perm); - } -}; - -#endif // EIGEN_MPL2_ONLY - -/** \ingroup OrderingMethods_Module - * \class NaturalOrdering - * - * Functor computing the natural ordering (identity) - * - * \note Returns an empty permutation matrix - * \tparam Index The type of indices of the matrix - */ -template -class NaturalOrdering -{ - public: - typedef PermutationMatrix PermutationType; - - /** Compute the permutation vector from a column-major sparse matrix */ - template - void operator()(const MatrixType& /*mat*/, PermutationType& perm) - { - perm.resize(0); - } - -}; - -/** \ingroup OrderingMethods_Module - * \class COLAMDOrdering - * - * Functor computing the \em column \em approximate \em minimum \em degree ordering - * The matrix should be in column-major and \b compressed format (see SparseMatrix::makeCompressed()). - */ -template -class COLAMDOrdering -{ - public: - typedef PermutationMatrix PermutationType; - typedef Matrix IndexVector; - - /** Compute the permutation vector \a perm form the sparse matrix \a mat - * \warning The input sparse matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - */ - template - void operator() (const MatrixType& mat, PermutationType& perm) - { - eigen_assert(mat.isCompressed() && "COLAMDOrdering requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to COLAMDOrdering"); - - Index m = mat.rows(); - Index n = mat.cols(); - Index nnz = mat.nonZeros(); - // Get the recommended value of Alen to be used by colamd - Index Alen = internal::colamd_recommended(nnz, m, n); - // Set the default parameters - double knobs [COLAMD_KNOBS]; - Index stats [COLAMD_STATS]; - internal::colamd_set_defaults(knobs); - - IndexVector p(n+1), A(Alen); - for(Index i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; - for(Index i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; - // Call Colamd routine to compute the ordering - Index info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); - EIGEN_UNUSED_VARIABLE(info); - eigen_assert( info && "COLAMD failed " ); - - perm.resize(n); - for (Index i = 0; i < n; i++) perm.indices()(p(i)) = i; - } -}; - -} // end namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/CMakeLists.txt index 28c657e9b..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_PastixSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_PastixSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/PaStiXSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/PaStiXSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/PaStiXSupport.h index 20acc0226..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -1,729 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PASTIXSUPPORT_H -#define EIGEN_PASTIXSUPPORT_H - -#if defined(DCOMPLEX) - #define PASTIX_COMPLEX COMPLEX - #define PASTIX_DCOMPLEX DCOMPLEX -#else - #define PASTIX_COMPLEX std::complex - #define PASTIX_DCOMPLEX std::complex -#endif - -namespace Eigen { - -/** \ingroup PaStiXSupport_Module - * \brief Interface to the PaStix solver - * - * This class is used to solve the linear systems A.X = B via the PaStix library. - * The matrix can be either real or complex, symmetric or not. - * - * \sa TutorialSparseDirectSolvers - */ -template class PastixLU; -template class PastixLLT; -template class PastixLDLT; - -namespace internal -{ - - template struct pastix_traits; - - template - struct pastix_traits< PastixLU<_MatrixType> > - { - typedef _MatrixType MatrixType; - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef typename _MatrixType::Index Index; - }; - - template - struct pastix_traits< PastixLLT<_MatrixType,Options> > - { - typedef _MatrixType MatrixType; - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef typename _MatrixType::Index Index; - }; - - template - struct pastix_traits< PastixLDLT<_MatrixType,Options> > - { - typedef _MatrixType MatrixType; - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef typename _MatrixType::Index Index; - }; - - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) - { - if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } - if (nbrhs == 0) {x = NULL; nbrhs=1;} - s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); - } - - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) - { - if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } - if (nbrhs == 0) {x = NULL; nbrhs=1;} - d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); - } - - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex *vals, int *perm, int * invp, std::complex *x, int nbrhs, int *iparm, double *dparm) - { - if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } - if (nbrhs == 0) {x = NULL; nbrhs=1;} - c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast(vals), perm, invp, reinterpret_cast(x), nbrhs, iparm, dparm); - } - - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex *vals, int *perm, int * invp, std::complex *x, int nbrhs, int *iparm, double *dparm) - { - if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } - if (nbrhs == 0) {x = NULL; nbrhs=1;} - z_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast(vals), perm, invp, reinterpret_cast(x), nbrhs, iparm, dparm); - } - - // Convert the matrix to Fortran-style Numbering - template - void c_to_fortran_numbering (MatrixType& mat) - { - if ( !(mat.outerIndexPtr()[0]) ) - { - int i; - for(i = 0; i <= mat.rows(); ++i) - ++mat.outerIndexPtr()[i]; - for(i = 0; i < mat.nonZeros(); ++i) - ++mat.innerIndexPtr()[i]; - } - } - - // Convert to C-style Numbering - template - void fortran_to_c_numbering (MatrixType& mat) - { - // Check the Numbering - if ( mat.outerIndexPtr()[0] == 1 ) - { // Convert to C-style numbering - int i; - for(i = 0; i <= mat.rows(); ++i) - --mat.outerIndexPtr()[i]; - for(i = 0; i < mat.nonZeros(); ++i) - --mat.innerIndexPtr()[i]; - } - } -} - -// This is the base class to interface with PaStiX functions. -// Users should not used this class directly. -template -class PastixBase : internal::noncopyable -{ - public: - typedef typename internal::pastix_traits::MatrixType _MatrixType; - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef Matrix Vector; - typedef SparseMatrix ColSpMatrix; - - public: - - PastixBase() : m_initisOk(false), m_analysisIsOk(false), m_factorizationIsOk(false), m_isInitialized(false), m_pastixdata(0), m_size(0) - { - init(); - } - - ~PastixBase() - { - clean(); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "Pastix solver is not initialized."); - eigen_assert(rows()==b.rows() - && "PastixBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - template - bool _solve (const MatrixBase &b, MatrixBase &x) const; - - Derived& derived() - { - return *static_cast(this); - } - const Derived& derived() const - { - return *static_cast(this); - } - - /** Returns a reference to the integer vector IPARM of PaStiX parameters - * to modify the default parameters. - * The statistics related to the different phases of factorization and solve are saved here as well - * \sa analyzePattern() factorize() - */ - Array& iparm() - { - return m_iparm; - } - - /** Return a reference to a particular index parameter of the IPARM vector - * \sa iparm() - */ - - int& iparm(int idxparam) - { - return m_iparm(idxparam); - } - - /** Returns a reference to the double vector DPARM of PaStiX parameters - * The statistics related to the different phases of factorization and solve are saved here as well - * \sa analyzePattern() factorize() - */ - Array& dparm() - { - return m_dparm; - } - - - /** Return a reference to a particular index parameter of the DPARM vector - * \sa dparm() - */ - double& dparm(int idxparam) - { - return m_dparm(idxparam); - } - - inline Index cols() const { return m_size; } - inline Index rows() const { return m_size; } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the PaStiX reports a problem - * \c InvalidInput if the input matrix is invalid - * - * \sa iparm() - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "Pastix LU, LLT or LDLT is not initialized."); - eigen_assert(rows()==b.rows() - && "PastixBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - protected: - - // Initialize the Pastix data structure, check the matrix - void init(); - - // Compute the ordering and the symbolic factorization - void analyzePattern(ColSpMatrix& mat); - - // Compute the numerical factorization - void factorize(ColSpMatrix& mat); - - // Free all the data allocated by Pastix - void clean() - { - eigen_assert(m_initisOk && "The Pastix structure should be allocated first"); - m_iparm(IPARM_START_TASK) = API_TASK_CLEAN; - m_iparm(IPARM_END_TASK) = API_TASK_CLEAN; - internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0, - m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data()); - } - - void compute(ColSpMatrix& mat); - - int m_initisOk; - int m_analysisIsOk; - int m_factorizationIsOk; - bool m_isInitialized; - mutable ComputationInfo m_info; - mutable pastix_data_t *m_pastixdata; // Data structure for pastix - mutable int m_comm; // The MPI communicator identifier - mutable Matrix m_iparm; // integer vector for the input parameters - mutable Matrix m_dparm; // Scalar vector for the input parameters - mutable Matrix m_perm; // Permutation vector - mutable Matrix m_invp; // Inverse permutation vector - mutable int m_size; // Size of the matrix -}; - - /** Initialize the PaStiX data structure. - *A first call to this function fills iparm and dparm with the default PaStiX parameters - * \sa iparm() dparm() - */ -template -void PastixBase::init() -{ - m_size = 0; - m_iparm.setZero(IPARM_SIZE); - m_dparm.setZero(DPARM_SIZE); - - m_iparm(IPARM_MODIFY_PARAMETER) = API_NO; - pastix(&m_pastixdata, MPI_COMM_WORLD, - 0, 0, 0, 0, - 0, 0, 0, 1, m_iparm.data(), m_dparm.data()); - - m_iparm[IPARM_MATRIX_VERIFICATION] = API_NO; - m_iparm[IPARM_VERBOSE] = 2; - m_iparm[IPARM_ORDERING] = API_ORDER_SCOTCH; - m_iparm[IPARM_INCOMPLETE] = API_NO; - m_iparm[IPARM_OOC_LIMIT] = 2000; - m_iparm[IPARM_RHS_MAKING] = API_RHS_B; - m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO; - - m_iparm(IPARM_START_TASK) = API_TASK_INIT; - m_iparm(IPARM_END_TASK) = API_TASK_INIT; - internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, 0, 0, 0, (Scalar*)0, - 0, 0, 0, 0, m_iparm.data(), m_dparm.data()); - - // Check the returned error - if(m_iparm(IPARM_ERROR_NUMBER)) { - m_info = InvalidInput; - m_initisOk = false; - } - else { - m_info = Success; - m_initisOk = true; - } -} - -template -void PastixBase::compute(ColSpMatrix& mat) -{ - eigen_assert(mat.rows() == mat.cols() && "The input matrix should be squared"); - - analyzePattern(mat); - factorize(mat); - - m_iparm(IPARM_MATRIX_VERIFICATION) = API_NO; - m_isInitialized = m_factorizationIsOk; -} - - -template -void PastixBase::analyzePattern(ColSpMatrix& mat) -{ - eigen_assert(m_initisOk && "The initialization of PaSTiX failed"); - - // clean previous calls - if(m_size>0) - clean(); - - m_size = mat.rows(); - m_perm.resize(m_size); - m_invp.resize(m_size); - - m_iparm(IPARM_START_TASK) = API_TASK_ORDERING; - m_iparm(IPARM_END_TASK) = API_TASK_ANALYSE; - internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(), - mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data()); - - // Check the returned error - if(m_iparm(IPARM_ERROR_NUMBER)) - { - m_info = NumericalIssue; - m_analysisIsOk = false; - } - else - { - m_info = Success; - m_analysisIsOk = true; - } -} - -template -void PastixBase::factorize(ColSpMatrix& mat) -{ -// if(&m_cpyMat != &mat) m_cpyMat = mat; - eigen_assert(m_analysisIsOk && "The analysis phase should be called before the factorization phase"); - m_iparm(IPARM_START_TASK) = API_TASK_NUMFACT; - m_iparm(IPARM_END_TASK) = API_TASK_NUMFACT; - m_size = mat.rows(); - - internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, m_size, mat.outerIndexPtr(), mat.innerIndexPtr(), - mat.valuePtr(), m_perm.data(), m_invp.data(), 0, 0, m_iparm.data(), m_dparm.data()); - - // Check the returned error - if(m_iparm(IPARM_ERROR_NUMBER)) - { - m_info = NumericalIssue; - m_factorizationIsOk = false; - m_isInitialized = false; - } - else - { - m_info = Success; - m_factorizationIsOk = true; - m_isInitialized = true; - } -} - -/* Solve the system */ -template -template -bool PastixBase::_solve (const MatrixBase &b, MatrixBase &x) const -{ - eigen_assert(m_isInitialized && "The matrix should be factorized first"); - EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - int rhs = 1; - - x = b; /* on return, x is overwritten by the computed solution */ - - for (int i = 0; i < b.cols(); i++){ - m_iparm[IPARM_START_TASK] = API_TASK_SOLVE; - m_iparm[IPARM_END_TASK] = API_TASK_REFINE; - - internal::eigen_pastix(&m_pastixdata, MPI_COMM_WORLD, x.rows(), 0, 0, 0, - m_perm.data(), m_invp.data(), &x(0, i), rhs, m_iparm.data(), m_dparm.data()); - } - - // Check the returned error - m_info = m_iparm(IPARM_ERROR_NUMBER)==0 ? Success : NumericalIssue; - - return m_iparm(IPARM_ERROR_NUMBER)==0; -} - -/** \ingroup PaStiXSupport_Module - * \class PastixLU - * \brief Sparse direct LU solver based on PaStiX library - * - * This class is used to solve the linear systems A.X = B with a supernodal LU - * factorization in the PaStiX library. The matrix A should be squared and nonsingular - * PaStiX requires that the matrix A has a symmetric structural pattern. - * This interface can symmetrize the input matrix otherwise. - * The vectors or matrices X and B can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam IsStrSym Indicates if the input matrix has a symmetric pattern, default is false - * NOTE : Note that if the analysis and factorization phase are called separately, - * the input matrix will be symmetrized at each call, hence it is advised to - * symmetrize the matrix in a end-user program and set \p IsStrSym to true - * - * \sa \ref TutorialSparseDirectSolvers - * - */ -template -class PastixLU : public PastixBase< PastixLU<_MatrixType> > -{ - public: - typedef _MatrixType MatrixType; - typedef PastixBase > Base; - typedef typename Base::ColSpMatrix ColSpMatrix; - typedef typename MatrixType::Index Index; - - public: - PastixLU() : Base() - { - init(); - } - - PastixLU(const MatrixType& matrix):Base() - { - init(); - compute(matrix); - } - /** Compute the LU supernodal factorization of \p matrix. - * iparm and dparm can be used to tune the PaStiX parameters. - * see the PaStiX user's manual - * \sa analyzePattern() factorize() - */ - void compute (const MatrixType& matrix) - { - m_structureIsUptodate = false; - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::compute(temp); - } - /** Compute the LU symbolic factorization of \p matrix using its sparsity pattern. - * Several ordering methods can be used at this step. See the PaStiX user's manual. - * The result of this operation can be used with successive matrices having the same pattern as \p matrix - * \sa factorize() - */ - void analyzePattern(const MatrixType& matrix) - { - m_structureIsUptodate = false; - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::analyzePattern(temp); - } - - /** Compute the LU supernodal factorization of \p matrix - * WARNING The matrix \p matrix should have the same structural pattern - * as the same used in the analysis phase. - * \sa analyzePattern() - */ - void factorize(const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::factorize(temp); - } - protected: - - void init() - { - m_structureIsUptodate = false; - m_iparm(IPARM_SYM) = API_SYM_NO; - m_iparm(IPARM_FACTORIZATION) = API_FACT_LU; - } - - void grabMatrix(const MatrixType& matrix, ColSpMatrix& out) - { - if(IsStrSym) - out = matrix; - else - { - if(!m_structureIsUptodate) - { - // update the transposed structure - m_transposedStructure = matrix.transpose(); - - // Set the elements of the matrix to zero - for (Index j=0; j - * \tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class PastixLLT : public PastixBase< PastixLLT<_MatrixType, _UpLo> > -{ - public: - typedef _MatrixType MatrixType; - typedef PastixBase > Base; - typedef typename Base::ColSpMatrix ColSpMatrix; - - public: - enum { UpLo = _UpLo }; - PastixLLT() : Base() - { - init(); - } - - PastixLLT(const MatrixType& matrix):Base() - { - init(); - compute(matrix); - } - - /** Compute the L factor of the LL^T supernodal factorization of \p matrix - * \sa analyzePattern() factorize() - */ - void compute (const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::compute(temp); - } - - /** Compute the LL^T symbolic factorization of \p matrix using its sparsity pattern - * The result of this operation can be used with successive matrices having the same pattern as \p matrix - * \sa factorize() - */ - void analyzePattern(const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::analyzePattern(temp); - } - /** Compute the LL^T supernodal numerical factorization of \p matrix - * \sa analyzePattern() - */ - void factorize(const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::factorize(temp); - } - protected: - using Base::m_iparm; - - void init() - { - m_iparm(IPARM_SYM) = API_SYM_YES; - m_iparm(IPARM_FACTORIZATION) = API_FACT_LLT; - } - - void grabMatrix(const MatrixType& matrix, ColSpMatrix& out) - { - // Pastix supports only lower, column-major matrices - out.template selfadjointView() = matrix.template selfadjointView(); - internal::c_to_fortran_numbering(out); - } -}; - -/** \ingroup PaStiXSupport_Module - * \class PastixLDLT - * \brief A sparse direct supernodal Cholesky (LLT) factorization and solver based on the PaStiX library - * - * This class is used to solve the linear systems A.X = B via a LDL^T supernodal Cholesky factorization - * available in the PaStiX library. The matrix A should be symmetric and positive definite - * WARNING Selfadjoint complex matrices are not supported in the current version of PaStiX - * The vectors or matrices X and B can be either dense or sparse - * - * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam UpLo The part of the matrix to use : Lower or Upper. The default is Lower as required by PaStiX - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class PastixLDLT : public PastixBase< PastixLDLT<_MatrixType, _UpLo> > -{ - public: - typedef _MatrixType MatrixType; - typedef PastixBase > Base; - typedef typename Base::ColSpMatrix ColSpMatrix; - - public: - enum { UpLo = _UpLo }; - PastixLDLT():Base() - { - init(); - } - - PastixLDLT(const MatrixType& matrix):Base() - { - init(); - compute(matrix); - } - - /** Compute the L and D factors of the LDL^T factorization of \p matrix - * \sa analyzePattern() factorize() - */ - void compute (const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::compute(temp); - } - - /** Compute the LDL^T symbolic factorization of \p matrix using its sparsity pattern - * The result of this operation can be used with successive matrices having the same pattern as \p matrix - * \sa factorize() - */ - void analyzePattern(const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::analyzePattern(temp); - } - /** Compute the LDL^T supernodal numerical factorization of \p matrix - * - */ - void factorize(const MatrixType& matrix) - { - ColSpMatrix temp; - grabMatrix(matrix, temp); - Base::factorize(temp); - } - - protected: - using Base::m_iparm; - - void init() - { - m_iparm(IPARM_SYM) = API_SYM_YES; - m_iparm(IPARM_FACTORIZATION) = API_FACT_LDLT; - } - - void grabMatrix(const MatrixType& matrix, ColSpMatrix& out) - { - // Pastix supports only lower, column-major matrices - out.template selfadjointView() = matrix.template selfadjointView(); - internal::c_to_fortran_numbering(out); - } -}; - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef PastixBase<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef PastixBase<_MatrixType> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/CMakeLists.txt index a097ab401..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_PardisoSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_PardisoSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/PardisoSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/PardisoSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/PardisoSupport.h index 0faacc5f5..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/PardisoSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/PardisoSupport/PardisoSupport.h @@ -1,603 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL PARDISO - ******************************************************************************** -*/ - -#ifndef EIGEN_PARDISOSUPPORT_H -#define EIGEN_PARDISOSUPPORT_H - -namespace Eigen { - -template class PardisoLU; -template class PardisoLLT; -template class PardisoLDLT; - -namespace internal -{ - template - struct pardiso_run_selector - { - static Index run( _MKL_DSS_HANDLE_t pt, Index maxfct, Index mnum, Index type, Index phase, Index n, void *a, - Index *ia, Index *ja, Index *perm, Index nrhs, Index *iparm, Index msglvl, void *b, void *x) - { - Index error = 0; - ::pardiso(pt, &maxfct, &mnum, &type, &phase, &n, a, ia, ja, perm, &nrhs, iparm, &msglvl, b, x, &error); - return error; - } - }; - template<> - struct pardiso_run_selector - { - typedef long long int Index; - static Index run( _MKL_DSS_HANDLE_t pt, Index maxfct, Index mnum, Index type, Index phase, Index n, void *a, - Index *ia, Index *ja, Index *perm, Index nrhs, Index *iparm, Index msglvl, void *b, void *x) - { - Index error = 0; - ::pardiso_64(pt, &maxfct, &mnum, &type, &phase, &n, a, ia, ja, perm, &nrhs, iparm, &msglvl, b, x, &error); - return error; - } - }; - - template struct pardiso_traits; - - template - struct pardiso_traits< PardisoLU<_MatrixType> > - { - typedef _MatrixType MatrixType; - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef typename _MatrixType::Index Index; - }; - - template - struct pardiso_traits< PardisoLLT<_MatrixType, Options> > - { - typedef _MatrixType MatrixType; - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef typename _MatrixType::Index Index; - }; - - template - struct pardiso_traits< PardisoLDLT<_MatrixType, Options> > - { - typedef _MatrixType MatrixType; - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef typename _MatrixType::Index Index; - }; - -} - -template -class PardisoImpl -{ - typedef internal::pardiso_traits Traits; - public: - typedef typename Traits::MatrixType MatrixType; - typedef typename Traits::Scalar Scalar; - typedef typename Traits::RealScalar RealScalar; - typedef typename Traits::Index Index; - typedef SparseMatrix SparseMatrixType; - typedef Matrix VectorType; - typedef Matrix IntRowVectorType; - typedef Matrix IntColVectorType; - typedef Array ParameterType; - enum { - ScalarIsComplex = NumTraits::IsComplex - }; - - PardisoImpl() - { - eigen_assert((sizeof(Index) >= sizeof(_INTEGER_t) && sizeof(Index) <= 8) && "Non-supported index type"); - m_iparm.setZero(); - m_msglvl = 0; // No output - m_initialized = false; - } - - ~PardisoImpl() - { - pardisoRelease(); - } - - inline Index cols() const { return m_size; } - inline Index rows() const { return m_size; } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix appears to be negative. - */ - ComputationInfo info() const - { - eigen_assert(m_initialized && "Decomposition is not initialized."); - return m_info; - } - - /** \warning for advanced usage only. - * \returns a reference to the parameter array controlling PARDISO. - * See the PARDISO manual to know how to use it. */ - ParameterType& pardisoParameterArray() - { - return m_iparm; - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - Derived& analyzePattern(const MatrixType& matrix); - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - Derived& factorize(const MatrixType& matrix); - - Derived& compute(const MatrixType& matrix); - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_initialized && "Pardiso solver is not initialized."); - eigen_assert(rows()==b.rows() - && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_initialized && "Pardiso solver is not initialized."); - eigen_assert(rows()==b.rows() - && "PardisoImpl::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - Derived& derived() - { - return *static_cast(this); - } - const Derived& derived() const - { - return *static_cast(this); - } - - template - bool _solve(const MatrixBase &b, MatrixBase& x) const; - - protected: - void pardisoRelease() - { - if(m_initialized) // Factorization ran at least once - { - internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, -1, m_size, 0, 0, 0, m_perm.data(), 0, - m_iparm.data(), m_msglvl, 0, 0); - } - } - - void pardisoInit(int type) - { - m_type = type; - bool symmetric = std::abs(m_type) < 10; - m_iparm[0] = 1; // No solver default - m_iparm[1] = 2; // use Metis for the ordering - m_iparm[2] = 0; // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??) - m_iparm[3] = 0; // No iterative-direct algorithm - m_iparm[4] = 0; // No user fill-in reducing permutation - m_iparm[5] = 0; // Write solution into x, b is left unchanged - m_iparm[6] = 0; // Not in use - m_iparm[7] = 2; // Max numbers of iterative refinement steps - m_iparm[8] = 0; // Not in use - m_iparm[9] = 13; // Perturb the pivot elements with 1E-13 - m_iparm[10] = symmetric ? 0 : 1; // Use nonsymmetric permutation and scaling MPS - m_iparm[11] = 0; // Not in use - m_iparm[12] = symmetric ? 0 : 1; // Maximum weighted matching algorithm is switched-off (default for symmetric). - // Try m_iparm[12] = 1 in case of inappropriate accuracy - m_iparm[13] = 0; // Output: Number of perturbed pivots - m_iparm[14] = 0; // Not in use - m_iparm[15] = 0; // Not in use - m_iparm[16] = 0; // Not in use - m_iparm[17] = -1; // Output: Number of nonzeros in the factor LU - m_iparm[18] = -1; // Output: Mflops for LU factorization - m_iparm[19] = 0; // Output: Numbers of CG Iterations - - m_iparm[20] = 0; // 1x1 pivoting - m_iparm[26] = 0; // No matrix checker - m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0; - m_iparm[34] = 1; // C indexing - m_iparm[36] = 0; // CSR - m_iparm[59] = 0; // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core - - memset(m_pt, 0, sizeof(m_pt)); - } - - protected: - // cached data to reduce reallocation, etc. - - void manageErrorCode(Index error) - { - switch(error) - { - case 0: - m_info = Success; - break; - case -4: - case -7: - m_info = NumericalIssue; - break; - default: - m_info = InvalidInput; - } - } - - mutable SparseMatrixType m_matrix; - ComputationInfo m_info; - bool m_initialized, m_analysisIsOk, m_factorizationIsOk; - Index m_type, m_msglvl; - mutable void *m_pt[64]; - mutable ParameterType m_iparm; - mutable IntColVectorType m_perm; - Index m_size; - - private: - PardisoImpl(PardisoImpl &) {} -}; - -template -Derived& PardisoImpl::compute(const MatrixType& a) -{ - m_size = a.rows(); - eigen_assert(a.rows() == a.cols()); - - pardisoRelease(); - memset(m_pt, 0, sizeof(m_pt)); - m_perm.setZero(m_size); - derived().getMatrix(a); - - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 12, m_size, - m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - - manageErrorCode(error); - m_analysisIsOk = true; - m_factorizationIsOk = true; - m_initialized = true; - return derived(); -} - -template -Derived& PardisoImpl::analyzePattern(const MatrixType& a) -{ - m_size = a.rows(); - eigen_assert(m_size == a.cols()); - - pardisoRelease(); - memset(m_pt, 0, sizeof(m_pt)); - m_perm.setZero(m_size); - derived().getMatrix(a); - - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 11, m_size, - m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - - manageErrorCode(error); - m_analysisIsOk = true; - m_factorizationIsOk = false; - m_initialized = true; - return derived(); -} - -template -Derived& PardisoImpl::factorize(const MatrixType& a) -{ - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - eigen_assert(m_size == a.rows() && m_size == a.cols()); - - derived().getMatrix(a); - - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 22, m_size, - m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL); - - manageErrorCode(error); - m_factorizationIsOk = true; - return derived(); -} - -template -template -bool PardisoImpl::_solve(const MatrixBase &b, MatrixBase& x) const -{ - if(m_iparm[0] == 0) // Factorization was not computed - return false; - - //Index n = m_matrix.rows(); - Index nrhs = Index(b.cols()); - eigen_assert(m_size==b.rows()); - eigen_assert(((MatrixBase::Flags & RowMajorBit) == 0 || nrhs == 1) && "Row-major right hand sides are not supported"); - eigen_assert(((MatrixBase::Flags & RowMajorBit) == 0 || nrhs == 1) && "Row-major matrices of unknowns are not supported"); - eigen_assert(((nrhs == 1) || b.outerStride() == b.rows())); - - -// switch (transposed) { -// case SvNoTrans : m_iparm[11] = 0 ; break; -// case SvTranspose : m_iparm[11] = 2 ; break; -// case SvAdjoint : m_iparm[11] = 1 ; break; -// default: -// //std::cerr << "Eigen: transposition option \"" << transposed << "\" not supported by the PARDISO backend\n"; -// m_iparm[11] = 0; -// } - - Scalar* rhs_ptr = const_cast(b.derived().data()); - Matrix tmp; - - // Pardiso cannot solve in-place - if(rhs_ptr == x.derived().data()) - { - tmp = b; - rhs_ptr = tmp.data(); - } - - Index error; - error = internal::pardiso_run_selector::run(m_pt, 1, 1, m_type, 33, m_size, - m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(), - m_perm.data(), nrhs, m_iparm.data(), m_msglvl, - rhs_ptr, x.derived().data()); - return error==0; -} - - -/** \ingroup PardisoSupport_Module - * \class PardisoLU - * \brief A sparse direct LU factorization and solver based on the PARDISO library - * - * This class allows to solve for A.X = B sparse linear problems via a direct LU factorization - * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible. - * The vectors or matrices X and B can be either dense or sparse. - * - * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: - * \code solver.pardisoParameterArray()[59] = 1; \endcode - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class PardisoLU : public PardisoImpl< PardisoLU > -{ - protected: - typedef PardisoImpl< PardisoLU > Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::RealScalar RealScalar; - using Base::pardisoInit; - using Base::m_matrix; - friend class PardisoImpl< PardisoLU >; - - public: - - using Base::compute; - using Base::solve; - - PardisoLU() - : Base() - { - pardisoInit(Base::ScalarIsComplex ? 13 : 11); - } - - PardisoLU(const MatrixType& matrix) - : Base() - { - pardisoInit(Base::ScalarIsComplex ? 13 : 11); - compute(matrix); - } - protected: - void getMatrix(const MatrixType& matrix) - { - m_matrix = matrix; - } - - private: - PardisoLU(PardisoLU& ) {} -}; - -/** \ingroup PardisoSupport_Module - * \class PardisoLLT - * \brief A sparse direct Cholesky (LLT) factorization and solver based on the PARDISO library - * - * This class allows to solve for A.X = B sparse linear problems via a LL^T Cholesky factorization - * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite. - * The vectors or matrices X and B can be either dense or sparse. - * - * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: - * \code solver.pardisoParameterArray()[59] = 1; \endcode - * - * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used. - * Upper|Lower can be used to tell both triangular parts can be used as input. - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class PardisoLLT : public PardisoImpl< PardisoLLT > -{ - protected: - typedef PardisoImpl< PardisoLLT > Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::Index Index; - typedef typename Base::RealScalar RealScalar; - using Base::pardisoInit; - using Base::m_matrix; - friend class PardisoImpl< PardisoLLT >; - - public: - - enum { UpLo = _UpLo }; - using Base::compute; - using Base::solve; - - PardisoLLT() - : Base() - { - pardisoInit(Base::ScalarIsComplex ? 4 : 2); - } - - PardisoLLT(const MatrixType& matrix) - : Base() - { - pardisoInit(Base::ScalarIsComplex ? 4 : 2); - compute(matrix); - } - - protected: - - void getMatrix(const MatrixType& matrix) - { - // PARDISO supports only upper, row-major matrices - PermutationMatrix p_null; - m_matrix.resize(matrix.rows(), matrix.cols()); - m_matrix.template selfadjointView() = matrix.template selfadjointView().twistedBy(p_null); - } - - private: - PardisoLLT(PardisoLLT& ) {} -}; - -/** \ingroup PardisoSupport_Module - * \class PardisoLDLT - * \brief A sparse direct Cholesky (LDLT) factorization and solver based on the PARDISO library - * - * This class allows to solve for A.X = B sparse linear problems via a LDL^T Cholesky factorization - * using the Intel MKL PARDISO library. The sparse matrix A is assumed to be selfajoint and positive definite. - * For complex matrices, A can also be symmetric only, see the \a Options template parameter. - * The vectors or matrices X and B can be either dense or sparse. - * - * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set: - * \code solver.pardisoParameterArray()[59] = 1; \endcode - * - * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used. - * Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix. - * Upper|Lower can be used to tell both triangular parts can be used as input. - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class PardisoLDLT : public PardisoImpl< PardisoLDLT > -{ - protected: - typedef PardisoImpl< PardisoLDLT > Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::Index Index; - typedef typename Base::RealScalar RealScalar; - using Base::pardisoInit; - using Base::m_matrix; - friend class PardisoImpl< PardisoLDLT >; - - public: - - using Base::compute; - using Base::solve; - enum { UpLo = Options&(Upper|Lower) }; - - PardisoLDLT() - : Base() - { - pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2); - } - - PardisoLDLT(const MatrixType& matrix) - : Base() - { - pardisoInit(Base::ScalarIsComplex ? ( bool(Options&Symmetric) ? 6 : -4 ) : -2); - compute(matrix); - } - - void getMatrix(const MatrixType& matrix) - { - // PARDISO supports only upper, row-major matrices - PermutationMatrix p_null; - m_matrix.resize(matrix.rows(), matrix.cols()); - m_matrix.template selfadjointView() = matrix.template selfadjointView().twistedBy(p_null); - } - - private: - PardisoLDLT(PardisoLDLT& ) {} -}; - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef PardisoImpl<_Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef PardisoImpl Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PARDISOSUPPORT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/QR/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/QR/CMakeLists.txt index 96f43d7f5..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/QR/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/QR/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_QR_SRCS "*.h") - -INSTALL(FILES - ${Eigen_QR_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/QR COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/QR/ColPivHouseholderQR_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/QR/ColPivHouseholderQR_MKL.h index 7b6ba0a5e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/QR/ColPivHouseholderQR_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/QR/ColPivHouseholderQR_MKL.h @@ -1,98 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Householder QR decomposition of a matrix with column pivoting based on - * LAPACKE_?geqp3 function. - ******************************************************************************** -*/ - -#ifndef EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H -#define EIGEN_COLPIVOTINGHOUSEHOLDERQR_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_QR_COLPIV(EIGTYPE, MKLTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -ColPivHouseholderQR >& \ -ColPivHouseholderQR >::compute( \ - const Matrix& matrix) \ -\ -{ \ - using std::abs; \ - typedef Matrix MatrixType; \ - typedef MatrixType::RealScalar RealScalar; \ - Index rows = matrix.rows();\ - Index cols = matrix.cols();\ - Index size = matrix.diagonalSize();\ -\ - m_qr = matrix;\ - m_hCoeffs.resize(size);\ -\ - m_colsTranspositions.resize(cols);\ - /*Index number_of_transpositions = 0;*/ \ -\ - m_nonzero_pivots = 0; \ - m_maxpivot = RealScalar(0);\ - m_colsPermutation.resize(cols); \ - m_colsPermutation.indices().setZero(); \ -\ - lapack_int lda = m_qr.outerStride(), i; \ - lapack_int matrix_order = MKLCOLROW; \ - LAPACKE_##MKLPREFIX##geqp3( matrix_order, rows, cols, (MKLTYPE*)m_qr.data(), lda, (lapack_int*)m_colsPermutation.indices().data(), (MKLTYPE*)m_hCoeffs.data()); \ - m_isInitialized = true; \ - m_maxpivot=m_qr.diagonal().cwiseAbs().maxCoeff(); \ - m_hCoeffs.adjointInPlace(); \ - RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold(); \ - lapack_int *perm = m_colsPermutation.indices().data(); \ - for(i=0;i premultiplied_threshold);\ - } \ - for(i=0;i \ -struct householder_qr_inplace_blocked \ -{ \ - static void run(MatrixQR& mat, HCoeffs& hCoeffs, \ - typename MatrixQR::Index = 32, \ - typename MatrixQR::Scalar* = 0) \ - { \ - lapack_int m = (lapack_int) mat.rows(); \ - lapack_int n = (lapack_int) mat.cols(); \ - lapack_int lda = (lapack_int) mat.outerStride(); \ - lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \ - hCoeffs.adjointInPlace(); \ - } \ -}; - -EIGEN_MKL_QR_NOPIV(double, double, d) -EIGEN_MKL_QR_NOPIV(float, float, s) -EIGEN_MKL_QR_NOPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_QR_NOPIV(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_QR_MKL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/CMakeLists.txt index 4968beaf2..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SPQRSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SPQRSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SPQRSupport/ COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h index 36138101d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h @@ -1,338 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Desire Nuentsa -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SUITESPARSEQRSUPPORT_H -#define EIGEN_SUITESPARSEQRSUPPORT_H - -namespace Eigen { - - template class SPQR; - template struct SPQRMatrixQReturnType; - template struct SPQRMatrixQTransposeReturnType; - template struct SPQR_QProduct; - namespace internal { - template struct traits > - { - typedef typename SPQRType::MatrixType ReturnType; - }; - template struct traits > - { - typedef typename SPQRType::MatrixType ReturnType; - }; - template struct traits > - { - typedef typename Derived::PlainObject ReturnType; - }; - } // End namespace internal - -/** - * \ingroup SPQRSupport_Module - * \class SPQR - * \brief Sparse QR factorization based on SuiteSparseQR library - * - * This class is used to perform a multithreaded and multifrontal rank-revealing QR decomposition - * of sparse matrices. The result is then used to solve linear leasts_square systems. - * Clearly, a QR factorization is returned such that A*P = Q*R where : - * - * P is the column permutation. Use colsPermutation() to get it. - * - * Q is the orthogonal matrix represented as Householder reflectors. - * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. - * You can then apply it to a vector. - * - * R is the sparse triangular factor. Use matrixQR() to get it as SparseMatrix. - * NOTE : The Index type of R is always SuiteSparse_long. You can get it with SPQR::Index - * - * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> - * NOTE - * - */ -template -class SPQR -{ - public: - typedef typename _MatrixType::Scalar Scalar; - typedef typename _MatrixType::RealScalar RealScalar; - typedef SuiteSparse_long Index ; - typedef SparseMatrix MatrixType; - typedef PermutationMatrix PermutationType; - public: - SPQR() - : m_isInitialized(false), m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits::epsilon()), m_useDefaultThreshold(true) - { - cholmod_l_start(&m_cc); - } - - SPQR(const _MatrixType& matrix) - : m_isInitialized(false), m_ordering(SPQR_ORDERING_DEFAULT), m_allow_tol(SPQR_DEFAULT_TOL), m_tolerance (NumTraits::epsilon()), m_useDefaultThreshold(true) - { - cholmod_l_start(&m_cc); - compute(matrix); - } - - ~SPQR() - { - SPQR_free(); - cholmod_l_finish(&m_cc); - } - void SPQR_free() - { - cholmod_l_free_sparse(&m_H, &m_cc); - cholmod_l_free_sparse(&m_cR, &m_cc); - cholmod_l_free_dense(&m_HTau, &m_cc); - std::free(m_E); - std::free(m_HPinv); - } - - void compute(const _MatrixType& matrix) - { - if(m_isInitialized) SPQR_free(); - - MatrixType mat(matrix); - - /* Compute the default threshold as in MatLab, see: - * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing - * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 - */ - RealScalar pivotThreshold = m_tolerance; - if(m_useDefaultThreshold) - { - using std::max; - RealScalar max2Norm = 0.0; - for (int j = 0; j < mat.cols(); j++) max2Norm = (max)(max2Norm, mat.col(j).norm()); - if(max2Norm==RealScalar(0)) - max2Norm = RealScalar(1); - pivotThreshold = 20 * (mat.rows() + mat.cols()) * max2Norm * NumTraits::epsilon(); - } - - cholmod_sparse A; - A = viewAsCholmod(mat); - Index col = matrix.cols(); - m_rank = SuiteSparseQR(m_ordering, pivotThreshold, col, &A, - &m_cR, &m_E, &m_H, &m_HPinv, &m_HTau, &m_cc); - - if (!m_cR) - { - m_info = NumericalIssue; - m_isInitialized = false; - return; - } - m_info = Success; - m_isInitialized = true; - m_isRUpToDate = false; - } - /** - * Get the number of rows of the input matrix and the Q matrix - */ - inline Index rows() const {return m_cR->nrow; } - - /** - * Get the number of columns of the input matrix. - */ - inline Index cols() const { return m_cR->ncol; } - - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval solve(const MatrixBase& B) const - { - eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); - eigen_assert(this->rows()==B.rows() - && "SPQR::solve(): invalid number of rows of the right hand side matrix B"); - return internal::solve_retval(*this, B.derived()); - } - - template - void _solve(const MatrixBase &b, MatrixBase &dest) const - { - eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); - eigen_assert(b.cols()==1 && "This method is for vectors only"); - - //Compute Q^T * b - typename Dest::PlainObject y, y2; - y = matrixQ().transpose() * b; - - // Solves with the triangular matrix R - Index rk = this->rank(); - y2 = y; - y.resize((std::max)(cols(),Index(y.rows())),y.cols()); - y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView().solve(y2.topRows(rk)); - - // Apply the column permutation - // colsPermutation() performs a copy of the permutation, - // so let's apply it manually: - for(Index i = 0; i < rk; ++i) dest.row(m_E[i]) = y.row(i); - for(Index i = rk; i < cols(); ++i) dest.row(m_E[i]).setZero(); - -// y.bottomRows(y.rows()-rk).setZero(); -// dest = colsPermutation() * y.topRows(cols()); - - m_info = Success; - } - - /** \returns the sparse triangular factor R. It is a sparse matrix - */ - const MatrixType matrixR() const - { - eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); - if(!m_isRUpToDate) { - m_R = viewAsEigen(*m_cR); - m_isRUpToDate = true; - } - return m_R; - } - /// Get an expression of the matrix Q - SPQRMatrixQReturnType matrixQ() const - { - return SPQRMatrixQReturnType(*this); - } - /// Get the permutation that was applied to columns of A - PermutationType colsPermutation() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - Index n = m_cR->ncol; - PermutationType colsPerm(n); - for(Index j = 0; j friend struct SPQR_QProduct; -}; - -template -struct SPQR_QProduct : ReturnByValue > -{ - typedef typename SPQRType::Scalar Scalar; - typedef typename SPQRType::Index Index; - //Define the constructor to get reference to argument types - SPQR_QProduct(const SPQRType& spqr, const Derived& other, bool transpose) : m_spqr(spqr),m_other(other),m_transpose(transpose) {} - - inline Index rows() const { return m_transpose ? m_spqr.rows() : m_spqr.cols(); } - inline Index cols() const { return m_other.cols(); } - // Assign to a vector - template - void evalTo(ResType& res) const - { - cholmod_dense y_cd; - cholmod_dense *x_cd; - int method = m_transpose ? SPQR_QTX : SPQR_QX; - cholmod_common *cc = m_spqr.cholmodCommon(); - y_cd = viewAsCholmod(m_other.const_cast_derived()); - x_cd = SuiteSparseQR_qmult(method, m_spqr.m_H, m_spqr.m_HTau, m_spqr.m_HPinv, &y_cd, cc); - res = Matrix::Map(reinterpret_cast(x_cd->x), x_cd->nrow, x_cd->ncol); - cholmod_l_free_dense(&x_cd, cc); - } - const SPQRType& m_spqr; - const Derived& m_other; - bool m_transpose; - -}; -template -struct SPQRMatrixQReturnType{ - - SPQRMatrixQReturnType(const SPQRType& spqr) : m_spqr(spqr) {} - template - SPQR_QProduct operator*(const MatrixBase& other) - { - return SPQR_QProduct(m_spqr,other.derived(),false); - } - SPQRMatrixQTransposeReturnType adjoint() const - { - return SPQRMatrixQTransposeReturnType(m_spqr); - } - // To use for operations with the transpose of Q - SPQRMatrixQTransposeReturnType transpose() const - { - return SPQRMatrixQTransposeReturnType(m_spqr); - } - const SPQRType& m_spqr; -}; - -template -struct SPQRMatrixQTransposeReturnType{ - SPQRMatrixQTransposeReturnType(const SPQRType& spqr) : m_spqr(spqr) {} - template - SPQR_QProduct operator*(const MatrixBase& other) - { - return SPQR_QProduct(m_spqr,other.derived(), true); - } - const SPQRType& m_spqr; -}; - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef SPQR<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - -}// End namespace Eigen -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SVD/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/SVD/CMakeLists.txt index 55efc44b1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SVD/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/SVD/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SVD_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SVD_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SVD COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SVD/JacobiSVD_MKL.h b/thirdparty/eigen-3.2.10/Eigen/src/SVD/JacobiSVD_MKL.h index 14e461c4e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SVD/JacobiSVD_MKL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SVD/JacobiSVD_MKL.h @@ -1,92 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * Singular Value Decomposition - SVD. - ******************************************************************************** -*/ - -#ifndef EIGEN_JACOBISVD_MKL_H -#define EIGEN_JACOBISVD_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_SVD(EIGTYPE, MKLTYPE, MKLRTYPE, MKLPREFIX, EIGCOLROW, MKLCOLROW) \ -template<> inline \ -JacobiSVD, ColPivHouseholderQRPreconditioner>& \ -JacobiSVD, ColPivHouseholderQRPreconditioner>::compute(const Matrix& matrix, unsigned int computationOptions) \ -{ \ - typedef Matrix MatrixType; \ - /*typedef MatrixType::Scalar Scalar;*/ \ - /*typedef MatrixType::RealScalar RealScalar;*/ \ - allocate(matrix.rows(), matrix.cols(), computationOptions); \ -\ - /*const RealScalar precision = RealScalar(2) * NumTraits::epsilon();*/ \ - m_nonzeroSingularValues = m_diagSize; \ -\ - lapack_int lda = matrix.outerStride(), ldu, ldvt; \ - lapack_int matrix_order = MKLCOLROW; \ - char jobu, jobvt; \ - MKLTYPE *u, *vt, dummy; \ - jobu = (m_computeFullU) ? 'A' : (m_computeThinU) ? 'S' : 'N'; \ - jobvt = (m_computeFullV) ? 'A' : (m_computeThinV) ? 'S' : 'N'; \ - if (computeU()) { \ - ldu = m_matrixU.outerStride(); \ - u = (MKLTYPE*)m_matrixU.data(); \ - } else { ldu=1; u=&dummy; }\ - MatrixType localV; \ - ldvt = (m_computeFullV) ? m_cols : (m_computeThinV) ? m_diagSize : 1; \ - if (computeV()) { \ - localV.resize(ldvt, m_cols); \ - vt = (MKLTYPE*)localV.data(); \ - } else { ldvt=1; vt=&dummy; }\ - Matrix superb; superb.resize(m_diagSize, 1); \ - MatrixType m_temp; m_temp = matrix; \ - LAPACKE_##MKLPREFIX##gesvd( matrix_order, jobu, jobvt, m_rows, m_cols, (MKLTYPE*)m_temp.data(), lda, (MKLRTYPE*)m_singularValues.data(), u, ldu, vt, ldvt, superb.data()); \ - if (computeV()) m_matrixV = localV.adjoint(); \ - /* for(int i=0;i -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SIMPLICIAL_CHOLESKY_H -#define EIGEN_SIMPLICIAL_CHOLESKY_H - -namespace Eigen { - -enum SimplicialCholeskyMode { - SimplicialCholeskyLLT, - SimplicialCholeskyLDLT -}; - -/** \ingroup SparseCholesky_Module - * \brief A direct sparse Cholesky factorizations - * - * These classes provide LL^T and LDL^T Cholesky factorizations of sparse matrices that are - * selfadjoint and positive definite. The factorization allows for solving A.X = B where - * X and B can be either dense or sparse. - * - * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization - * such that the factorized matrix is P A P^-1. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * - */ -template -class SimplicialCholeskyBase : internal::noncopyable -{ - public: - typedef typename internal::traits::MatrixType MatrixType; - typedef typename internal::traits::OrderingType OrderingType; - enum { UpLo = internal::traits::UpLo }; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix CholMatrixType; - typedef Matrix VectorType; - - public: - - /** Default constructor */ - SimplicialCholeskyBase() - : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1) - {} - - SimplicialCholeskyBase(const MatrixType& matrix) - : m_info(Success), m_isInitialized(false), m_shiftOffset(0), m_shiftScale(1) - { - derived().compute(matrix); - } - - ~SimplicialCholeskyBase() - { - } - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } - - inline Index cols() const { return m_matrix.cols(); } - inline Index rows() const { return m_matrix.rows(); } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized."); - eigen_assert(rows()==b.rows() - && "SimplicialCholeskyBase::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "Simplicial LLT or LDLT is not initialized."); - eigen_assert(rows()==b.rows() - && "SimplicialCholesky::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** \returns the permutation P - * \sa permutationPinv() */ - const PermutationMatrix& permutationP() const - { return m_P; } - - /** \returns the inverse P^-1 of the permutation P - * \sa permutationP() */ - const PermutationMatrix& permutationPinv() const - { return m_Pinv; } - - /** Sets the shift parameters that will be used to adjust the diagonal coefficients during the numerical factorization. - * - * During the numerical factorization, the diagonal coefficients are transformed by the following linear model:\n - * \c d_ii = \a offset + \a scale * \c d_ii - * - * The default is the identity transformation with \a offset=0, and \a scale=1. - * - * \returns a reference to \c *this. - */ - Derived& setShift(const RealScalar& offset, const RealScalar& scale = 1) - { - m_shiftOffset = offset; - m_shiftScale = scale; - return derived(); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal */ - template - void dumpMemory(Stream& s) - { - int total = 0; - s << " L: " << ((total+=(m_matrix.cols()+1) * sizeof(int) + m_matrix.nonZeros()*(sizeof(int)+sizeof(Scalar))) >> 20) << "Mb" << "\n"; - s << " diag: " << ((total+=m_diag.size() * sizeof(Scalar)) >> 20) << "Mb" << "\n"; - s << " tree: " << ((total+=m_parent.size() * sizeof(int)) >> 20) << "Mb" << "\n"; - s << " nonzeros: " << ((total+=m_nonZerosPerCol.size() * sizeof(int)) >> 20) << "Mb" << "\n"; - s << " perm: " << ((total+=m_P.size() * sizeof(int)) >> 20) << "Mb" << "\n"; - s << " perm^-1: " << ((total+=m_Pinv.size() * sizeof(int)) >> 20) << "Mb" << "\n"; - s << " TOTAL: " << (total>> 20) << "Mb" << "\n"; - } - - /** \internal */ - template - void _solve(const MatrixBase &b, MatrixBase &dest) const - { - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); - eigen_assert(m_matrix.rows()==b.rows()); - - if(m_info!=Success) - return; - - if(m_P.size()>0) - dest = m_P * b; - else - dest = b; - - if(m_matrix.nonZeros()>0) // otherwise L==I - derived().matrixL().solveInPlace(dest); - - if(m_diag.size()>0) - dest = m_diag.asDiagonal().inverse() * dest; - - if (m_matrix.nonZeros()>0) // otherwise U==I - derived().matrixU().solveInPlace(dest); - - if(m_P.size()>0) - dest = m_Pinv * dest; - } - -#endif // EIGEN_PARSED_BY_DOXYGEN - - protected: - - /** Computes the sparse Cholesky decomposition of \a matrix */ - template - void compute(const MatrixType& matrix) - { - eigen_assert(matrix.rows()==matrix.cols()); - Index size = matrix.cols(); - CholMatrixType ap(size,size); - ordering(matrix, ap); - analyzePattern_preordered(ap, DoLDLT); - factorize_preordered(ap); - } - - template - void factorize(const MatrixType& a) - { - eigen_assert(a.rows()==a.cols()); - int size = a.cols(); - CholMatrixType ap(size,size); - ap.template selfadjointView() = a.template selfadjointView().twistedBy(m_P); - factorize_preordered(ap); - } - - template - void factorize_preordered(const CholMatrixType& a); - - void analyzePattern(const MatrixType& a, bool doLDLT) - { - eigen_assert(a.rows()==a.cols()); - int size = a.cols(); - CholMatrixType ap(size,size); - ordering(a, ap); - analyzePattern_preordered(ap,doLDLT); - } - void analyzePattern_preordered(const CholMatrixType& a, bool doLDLT); - - void ordering(const MatrixType& a, CholMatrixType& ap); - - /** keeps off-diagonal entries; drops diagonal entries */ - struct keep_diag { - inline bool operator() (const Index& row, const Index& col, const Scalar&) const - { - return row!=col; - } - }; - - mutable ComputationInfo m_info; - bool m_isInitialized; - bool m_factorizationIsOk; - bool m_analysisIsOk; - - CholMatrixType m_matrix; - VectorType m_diag; // the diagonal coefficients (LDLT mode) - VectorXi m_parent; // elimination tree - VectorXi m_nonZerosPerCol; - PermutationMatrix m_P; // the permutation - PermutationMatrix m_Pinv; // the inverse permutation - - RealScalar m_shiftOffset; - RealScalar m_shiftScale; -}; - -template > class SimplicialLLT; -template > class SimplicialLDLT; -template > class SimplicialCholesky; - -namespace internal { - -template struct traits > -{ - typedef _MatrixType MatrixType; - typedef _Ordering OrderingType; - enum { UpLo = _UpLo }; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix CholMatrixType; - typedef SparseTriangularView MatrixL; - typedef SparseTriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } -}; - -template struct traits > -{ - typedef _MatrixType MatrixType; - typedef _Ordering OrderingType; - enum { UpLo = _UpLo }; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix CholMatrixType; - typedef SparseTriangularView MatrixL; - typedef SparseTriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } -}; - -template struct traits > -{ - typedef _MatrixType MatrixType; - typedef _Ordering OrderingType; - enum { UpLo = _UpLo }; -}; - -} - -/** \ingroup SparseCholesky_Module - * \class SimplicialLLT - * \brief A direct sparse LLT Cholesky factorizations - * - * This class provides a LL^T Cholesky factorizations of sparse matrices that are - * selfadjoint and positive definite. The factorization allows for solving A.X = B where - * X and B can be either dense or sparse. - * - * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization - * such that the factorized matrix is P A P^-1. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> - * - * \sa class SimplicialLDLT, class AMDOrdering, class NaturalOrdering - */ -template - class SimplicialLLT : public SimplicialCholeskyBase > -{ -public: - typedef _MatrixType MatrixType; - enum { UpLo = _UpLo }; - typedef SimplicialCholeskyBase Base; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix CholMatrixType; - typedef Matrix VectorType; - typedef internal::traits Traits; - typedef typename Traits::MatrixL MatrixL; - typedef typename Traits::MatrixU MatrixU; -public: - /** Default constructor */ - SimplicialLLT() : Base() {} - /** Constructs and performs the LLT factorization of \a matrix */ - SimplicialLLT(const MatrixType& matrix) - : Base(matrix) {} - - /** \returns an expression of the factor L */ - inline const MatrixL matrixL() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized"); - return Traits::getL(Base::m_matrix); - } - - /** \returns an expression of the factor U (= L^*) */ - inline const MatrixU matrixU() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial LLT not factorized"); - return Traits::getU(Base::m_matrix); - } - - /** Computes the sparse Cholesky decomposition of \a matrix */ - SimplicialLLT& compute(const MatrixType& matrix) - { - Base::template compute(matrix); - return *this; - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& a) - { - Base::analyzePattern(a, false); - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - void factorize(const MatrixType& a) - { - Base::template factorize(a); - } - - /** \returns the determinant of the underlying matrix from the current factorization */ - Scalar determinant() const - { - Scalar detL = Base::m_matrix.diagonal().prod(); - return numext::abs2(detL); - } -}; - -/** \ingroup SparseCholesky_Module - * \class SimplicialLDLT - * \brief A direct sparse LDLT Cholesky factorizations without square root. - * - * This class provides a LDL^T Cholesky factorizations without square root of sparse matrices that are - * selfadjoint and positive definite. The factorization allows for solving A.X = B where - * X and B can be either dense or sparse. - * - * In order to reduce the fill-in, a symmetric permutation P is applied prior to the factorization - * such that the factorized matrix is P A P^-1. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower - * or Upper. Default is Lower. - * \tparam _Ordering The ordering method to use, either AMDOrdering<> or NaturalOrdering<>. Default is AMDOrdering<> - * - * \sa class SimplicialLLT, class AMDOrdering, class NaturalOrdering - */ -template - class SimplicialLDLT : public SimplicialCholeskyBase > -{ -public: - typedef _MatrixType MatrixType; - enum { UpLo = _UpLo }; - typedef SimplicialCholeskyBase Base; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix CholMatrixType; - typedef Matrix VectorType; - typedef internal::traits Traits; - typedef typename Traits::MatrixL MatrixL; - typedef typename Traits::MatrixU MatrixU; -public: - /** Default constructor */ - SimplicialLDLT() : Base() {} - - /** Constructs and performs the LLT factorization of \a matrix */ - SimplicialLDLT(const MatrixType& matrix) - : Base(matrix) {} - - /** \returns a vector expression of the diagonal D */ - inline const VectorType vectorD() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized"); - return Base::m_diag; - } - /** \returns an expression of the factor L */ - inline const MatrixL matrixL() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized"); - return Traits::getL(Base::m_matrix); - } - - /** \returns an expression of the factor U (= L^*) */ - inline const MatrixU matrixU() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial LDLT not factorized"); - return Traits::getU(Base::m_matrix); - } - - /** Computes the sparse Cholesky decomposition of \a matrix */ - SimplicialLDLT& compute(const MatrixType& matrix) - { - Base::template compute(matrix); - return *this; - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& a) - { - Base::analyzePattern(a, true); - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - void factorize(const MatrixType& a) - { - Base::template factorize(a); - } - - /** \returns the determinant of the underlying matrix from the current factorization */ - Scalar determinant() const - { - return Base::m_diag.prod(); - } -}; - -/** \deprecated use SimplicialLDLT or class SimplicialLLT - * \ingroup SparseCholesky_Module - * \class SimplicialCholesky - * - * \sa class SimplicialLDLT, class SimplicialLLT - */ -template - class SimplicialCholesky : public SimplicialCholeskyBase > -{ -public: - typedef _MatrixType MatrixType; - enum { UpLo = _UpLo }; - typedef SimplicialCholeskyBase Base; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix CholMatrixType; - typedef Matrix VectorType; - typedef internal::traits Traits; - typedef internal::traits > LDLTTraits; - typedef internal::traits > LLTTraits; - public: - SimplicialCholesky() : Base(), m_LDLT(true) {} - - SimplicialCholesky(const MatrixType& matrix) - : Base(), m_LDLT(true) - { - compute(matrix); - } - - SimplicialCholesky& setMode(SimplicialCholeskyMode mode) - { - switch(mode) - { - case SimplicialCholeskyLLT: - m_LDLT = false; - break; - case SimplicialCholeskyLDLT: - m_LDLT = true; - break; - default: - break; - } - - return *this; - } - - inline const VectorType vectorD() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial Cholesky not factorized"); - return Base::m_diag; - } - inline const CholMatrixType rawMatrix() const { - eigen_assert(Base::m_factorizationIsOk && "Simplicial Cholesky not factorized"); - return Base::m_matrix; - } - - /** Computes the sparse Cholesky decomposition of \a matrix */ - SimplicialCholesky& compute(const MatrixType& matrix) - { - if(m_LDLT) - Base::template compute(matrix); - else - Base::template compute(matrix); - return *this; - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& a) - { - Base::analyzePattern(a, m_LDLT); - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - void factorize(const MatrixType& a) - { - if(m_LDLT) - Base::template factorize(a); - else - Base::template factorize(a); - } - - /** \internal */ - template - void _solve(const MatrixBase &b, MatrixBase &dest) const - { - eigen_assert(Base::m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); - eigen_assert(Base::m_matrix.rows()==b.rows()); - - if(Base::m_info!=Success) - return; - - if(Base::m_P.size()>0) - dest = Base::m_P * b; - else - dest = b; - - if(Base::m_matrix.nonZeros()>0) // otherwise L==I - { - if(m_LDLT) - LDLTTraits::getL(Base::m_matrix).solveInPlace(dest); - else - LLTTraits::getL(Base::m_matrix).solveInPlace(dest); - } - - if(Base::m_diag.size()>0) - dest = Base::m_diag.asDiagonal().inverse() * dest; - - if (Base::m_matrix.nonZeros()>0) // otherwise I==I - { - if(m_LDLT) - LDLTTraits::getU(Base::m_matrix).solveInPlace(dest); - else - LLTTraits::getU(Base::m_matrix).solveInPlace(dest); - } - - if(Base::m_P.size()>0) - dest = Base::m_Pinv * dest; - } - - Scalar determinant() const - { - if(m_LDLT) - { - return Base::m_diag.prod(); - } - else - { - Scalar detL = Diagonal(Base::m_matrix).prod(); - return numext::abs2(detL); - } - } - - protected: - bool m_LDLT; -}; - -template -void SimplicialCholeskyBase::ordering(const MatrixType& a, CholMatrixType& ap) -{ - eigen_assert(a.rows()==a.cols()); - const Index size = a.rows(); - // Note that amd compute the inverse permutation - { - CholMatrixType C; - C = a.template selfadjointView(); - - OrderingType ordering; - ordering(C,m_Pinv); - } - - if(m_Pinv.size()>0) - m_P = m_Pinv.inverse(); - else - m_P.resize(0); - - ap.resize(size,size); - ap.template selfadjointView() = a.template selfadjointView().twistedBy(m_P); -} - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef SimplicialCholeskyBase Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec().derived()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef SimplicialCholeskyBase Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SIMPLICIAL_CHOLESKY_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h index 7aaf702be..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h @@ -1,199 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2012 Gael Guennebaud - -/* - -NOTE: thes functions vave been adapted from the LDL library: - -LDL Copyright (c) 2005 by Timothy A. Davis. All Rights Reserved. - -LDL License: - - Your use or distribution of LDL or any modified version of - LDL implies that you agree to this License. - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 - USA - - Permission is hereby granted to use or copy this program under the - terms of the GNU LGPL, provided that the Copyright, this License, - and the Availability of the original version is retained on all copies. - User documentation of any code that uses this code or any modified - version of this code must cite the Copyright, this License, the - Availability note, and "Used by permission." Permission to modify - the code and to distribute modified code is granted, provided the - Copyright, this License, and the Availability note are retained, - and a notice that the code was modified is included. - */ - -#include "../Core/util/NonMPL2.h" - -#ifndef EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H -#define EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H - -namespace Eigen { - -template -void SimplicialCholeskyBase::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT) -{ - const Index size = ap.rows(); - m_matrix.resize(size, size); - m_parent.resize(size); - m_nonZerosPerCol.resize(size); - - ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0); - - for(Index k = 0; k < size; ++k) - { - /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */ - m_parent[k] = -1; /* parent of k is not yet known */ - tags[k] = k; /* mark node k as visited */ - m_nonZerosPerCol[k] = 0; /* count of nonzeros in column k of L */ - for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it) - { - Index i = it.index(); - if(i < k) - { - /* follow path from i to root of etree, stop at flagged node */ - for(; tags[i] != k; i = m_parent[i]) - { - /* find parent of i if not yet determined */ - if (m_parent[i] == -1) - m_parent[i] = k; - m_nonZerosPerCol[i]++; /* L (k,i) is nonzero */ - tags[i] = k; /* mark i as visited */ - } - } - } - } - - /* construct Lp index array from m_nonZerosPerCol column counts */ - Index* Lp = m_matrix.outerIndexPtr(); - Lp[0] = 0; - for(Index k = 0; k < size; ++k) - Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1); - - m_matrix.resizeNonZeros(Lp[size]); - - m_isInitialized = true; - m_info = Success; - m_analysisIsOk = true; - m_factorizationIsOk = false; -} - - -template -template -void SimplicialCholeskyBase::factorize_preordered(const CholMatrixType& ap) -{ - using std::sqrt; - - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - eigen_assert(ap.rows()==ap.cols()); - const Index size = ap.rows(); - eigen_assert(m_parent.size()==size); - eigen_assert(m_nonZerosPerCol.size()==size); - - const Index* Lp = m_matrix.outerIndexPtr(); - Index* Li = m_matrix.innerIndexPtr(); - Scalar* Lx = m_matrix.valuePtr(); - - ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0); - ei_declare_aligned_stack_constructed_variable(Index, pattern, size, 0); - ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0); - - bool ok = true; - m_diag.resize(DoLDLT ? size : 0); - - for(Index k = 0; k < size; ++k) - { - // compute nonzero pattern of kth row of L, in topological order - y[k] = 0.0; // Y(0:k) is now all zero - Index top = size; // stack for pattern is empty - tags[k] = k; // mark node k as visited - m_nonZerosPerCol[k] = 0; // count of nonzeros in column k of L - for(typename MatrixType::InnerIterator it(ap,k); it; ++it) - { - Index i = it.index(); - if(i <= k) - { - y[i] += numext::conj(it.value()); /* scatter A(i,k) into Y (sum duplicates) */ - Index len; - for(len = 0; tags[i] != k; i = m_parent[i]) - { - pattern[len++] = i; /* L(k,i) is nonzero */ - tags[i] = k; /* mark i as visited */ - } - while(len > 0) - pattern[--top] = pattern[--len]; - } - } - - /* compute numerical values kth row of L (a sparse triangular solve) */ - - RealScalar d = numext::real(y[k]) * m_shiftScale + m_shiftOffset; // get D(k,k), apply the shift function, and clear Y(k) - y[k] = 0.0; - for(; top < size; ++top) - { - Index i = pattern[top]; /* pattern[top:n-1] is pattern of L(:,k) */ - Scalar yi = y[i]; /* get and clear Y(i) */ - y[i] = 0.0; - - /* the nonzero entry L(k,i) */ - Scalar l_ki; - if(DoLDLT) - l_ki = yi / m_diag[i]; - else - yi = l_ki = yi / Lx[Lp[i]]; - - Index p2 = Lp[i] + m_nonZerosPerCol[i]; - Index p; - for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) - y[Li[p]] -= numext::conj(Lx[p]) * yi; - d -= numext::real(l_ki * numext::conj(yi)); - Li[p] = k; /* store L(k,i) in column form of L */ - Lx[p] = l_ki; - ++m_nonZerosPerCol[i]; /* increment count of nonzeros in col i */ - } - if(DoLDLT) - { - m_diag[k] = d; - if(d == RealScalar(0)) - { - ok = false; /* failure, D(k,k) is zero */ - break; - } - } - else - { - Index p = Lp[k] + m_nonZerosPerCol[k]++; - Li[p] = k ; /* store L(k,k) = sqrt (d) in column k */ - if(d <= RealScalar(0)) { - ok = false; /* failure, matrix is not positive definite */ - break; - } - Lx[p] = sqrt(d) ; - } - } - - m_info = ok ? Success : NumericalIssue; - m_factorizationIsOk = true; -} - -} // end namespace Eigen - -#endif // EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/AmbiVector.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/AmbiVector.h index 220c6451c..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/AmbiVector.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/AmbiVector.h @@ -1,373 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_AMBIVECTOR_H -#define EIGEN_AMBIVECTOR_H - -namespace Eigen { - -namespace internal { - -/** \internal - * Hybrid sparse/dense vector class designed for intensive read-write operations. - * - * See BasicSparseLLT and SparseProduct for usage examples. - */ -template -class AmbiVector -{ - public: - typedef _Scalar Scalar; - typedef _Index Index; - typedef typename NumTraits::Real RealScalar; - - AmbiVector(Index size) - : m_buffer(0), m_zero(0), m_size(0), m_allocatedSize(0), m_allocatedElements(0), m_mode(-1) - { - resize(size); - } - - void init(double estimatedDensity); - void init(int mode); - - Index nonZeros() const; - - /** Specifies a sub-vector to work on */ - void setBounds(Index start, Index end) { m_start = start; m_end = end; } - - void setZero(); - - void restart(); - Scalar& coeffRef(Index i); - Scalar& coeff(Index i); - - class Iterator; - - ~AmbiVector() { delete[] m_buffer; } - - void resize(Index size) - { - if (m_allocatedSize < size) - reallocate(size); - m_size = size; - } - - Index size() const { return m_size; } - - protected: - - void reallocate(Index size) - { - // if the size of the matrix is not too large, let's allocate a bit more than needed such - // that we can handle dense vector even in sparse mode. - delete[] m_buffer; - if (size<1000) - { - Index allocSize = (size * sizeof(ListEl) + sizeof(Scalar) - 1)/sizeof(Scalar); - m_allocatedElements = (allocSize*sizeof(Scalar))/sizeof(ListEl); - m_buffer = new Scalar[allocSize]; - } - else - { - m_allocatedElements = (size*sizeof(Scalar))/sizeof(ListEl); - m_buffer = new Scalar[size]; - } - m_size = size; - m_start = 0; - m_end = m_size; - } - - void reallocateSparse() - { - Index copyElements = m_allocatedElements; - m_allocatedElements = (std::min)(Index(m_allocatedElements*1.5),m_size); - Index allocSize = m_allocatedElements * sizeof(ListEl); - allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar); - Scalar* newBuffer = new Scalar[allocSize]; - memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); - delete[] m_buffer; - m_buffer = newBuffer; - } - - protected: - // element type of the linked list - struct ListEl - { - Index next; - Index index; - Scalar value; - }; - - // used to store data in both mode - Scalar* m_buffer; - Scalar m_zero; - Index m_size; - Index m_start; - Index m_end; - Index m_allocatedSize; - Index m_allocatedElements; - Index m_mode; - - // linked list mode - Index m_llStart; - Index m_llCurrent; - Index m_llSize; -}; - -/** \returns the number of non zeros in the current sub vector */ -template -_Index AmbiVector<_Scalar,_Index>::nonZeros() const -{ - if (m_mode==IsSparse) - return m_llSize; - else - return m_end - m_start; -} - -template -void AmbiVector<_Scalar,_Index>::init(double estimatedDensity) -{ - if (estimatedDensity>0.1) - init(IsDense); - else - init(IsSparse); -} - -template -void AmbiVector<_Scalar,_Index>::init(int mode) -{ - m_mode = mode; - if (m_mode==IsSparse) - { - m_llSize = 0; - m_llStart = -1; - } -} - -/** Must be called whenever we might perform a write access - * with an index smaller than the previous one. - * - * Don't worry, this function is extremely cheap. - */ -template -void AmbiVector<_Scalar,_Index>::restart() -{ - m_llCurrent = m_llStart; -} - -/** Set all coefficients of current subvector to zero */ -template -void AmbiVector<_Scalar,_Index>::setZero() -{ - if (m_mode==IsDense) - { - for (Index i=m_start; i -_Scalar& AmbiVector<_Scalar,_Index>::coeffRef(_Index i) -{ - if (m_mode==IsDense) - return m_buffer[i]; - else - { - ListEl* EIGEN_RESTRICT llElements = reinterpret_cast(m_buffer); - // TODO factorize the following code to reduce code generation - eigen_assert(m_mode==IsSparse); - if (m_llSize==0) - { - // this is the first element - m_llStart = 0; - m_llCurrent = 0; - ++m_llSize; - llElements[0].value = Scalar(0); - llElements[0].index = i; - llElements[0].next = -1; - return llElements[0].value; - } - else if (i=llElements[m_llCurrent].index && "you must call restart() before inserting an element with lower or equal index"); - while (nextel >= 0 && llElements[nextel].index<=i) - { - m_llCurrent = nextel; - nextel = llElements[nextel].next; - } - - if (llElements[m_llCurrent].index==i) - { - // the coefficient already exists and we found it ! - return llElements[m_llCurrent].value; - } - else - { - if (m_llSize>=m_allocatedElements) - { - reallocateSparse(); - llElements = reinterpret_cast(m_buffer); - } - eigen_internal_assert(m_llSize -_Scalar& AmbiVector<_Scalar,_Index>::coeff(_Index i) -{ - if (m_mode==IsDense) - return m_buffer[i]; - else - { - ListEl* EIGEN_RESTRICT llElements = reinterpret_cast(m_buffer); - eigen_assert(m_mode==IsSparse); - if ((m_llSize==0) || (i= 0 && llElements[elid].index -class AmbiVector<_Scalar,_Index>::Iterator -{ - public: - typedef _Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - - /** Default constructor - * \param vec the vector on which we iterate - * \param epsilon the minimal value used to prune zero coefficients. - * In practice, all coefficients having a magnitude smaller than \a epsilon - * are skipped. - */ - Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0) - : m_vector(vec) - { - using std::abs; - m_epsilon = epsilon; - m_isDense = m_vector.m_mode==IsDense; - if (m_isDense) - { - m_currentEl = 0; // this is to avoid a compilation warning - m_cachedValue = 0; // this is to avoid a compilation warning - m_cachedIndex = m_vector.m_start-1; - ++(*this); - } - else - { - ListEl* EIGEN_RESTRICT llElements = reinterpret_cast(m_vector.m_buffer); - m_currentEl = m_vector.m_llStart; - while (m_currentEl>=0 && abs(llElements[m_currentEl].value)<=m_epsilon) - m_currentEl = llElements[m_currentEl].next; - if (m_currentEl<0) - { - m_cachedValue = 0; // this is to avoid a compilation warning - m_cachedIndex = -1; - } - else - { - m_cachedIndex = llElements[m_currentEl].index; - m_cachedValue = llElements[m_currentEl].value; - } - } - } - - Index index() const { return m_cachedIndex; } - Scalar value() const { return m_cachedValue; } - - operator bool() const { return m_cachedIndex>=0; } - - Iterator& operator++() - { - using std::abs; - if (m_isDense) - { - do { - ++m_cachedIndex; - } while (m_cachedIndex(m_vector.m_buffer); - do { - m_currentEl = llElements[m_currentEl].next; - } while (m_currentEl>=0 && abs(llElements[m_currentEl].value) -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_COMPRESSED_STORAGE_H -#define EIGEN_COMPRESSED_STORAGE_H - -namespace Eigen { - -namespace internal { - -/** \internal - * Stores a sparse set of values as a list of values and a list of indices. - * - */ -template -class CompressedStorage -{ - public: - - typedef _Scalar Scalar; - typedef _Index Index; - - protected: - - typedef typename NumTraits::Real RealScalar; - - public: - - CompressedStorage() - : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0) - {} - - CompressedStorage(size_t size) - : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0) - { - resize(size); - } - - CompressedStorage(const CompressedStorage& other) - : m_values(0), m_indices(0), m_size(0), m_allocatedSize(0) - { - *this = other; - } - - CompressedStorage& operator=(const CompressedStorage& other) - { - resize(other.size()); - internal::smart_copy(other.m_values, other.m_values + m_size, m_values); - internal::smart_copy(other.m_indices, other.m_indices + m_size, m_indices); - return *this; - } - - void swap(CompressedStorage& other) - { - std::swap(m_values, other.m_values); - std::swap(m_indices, other.m_indices); - std::swap(m_size, other.m_size); - std::swap(m_allocatedSize, other.m_allocatedSize); - } - - ~CompressedStorage() - { - delete[] m_values; - delete[] m_indices; - } - - void reserve(size_t size) - { - size_t newAllocatedSize = m_size + size; - if (newAllocatedSize > m_allocatedSize) - reallocate(newAllocatedSize); - } - - void squeeze() - { - if (m_allocatedSize>m_size) - reallocate(m_size); - } - - void resize(size_t size, double reserveSizeFactor = 0) - { - if (m_allocatedSize(m_size); - resize(m_size+1, 1); - m_values[id] = v; - m_indices[id] = i; - } - - inline size_t size() const { return m_size; } - inline size_t allocatedSize() const { return m_allocatedSize; } - inline void clear() { m_size = 0; } - - const Scalar* valuePtr() const { return m_values; } - Scalar* valuePtr() { return m_values; } - const Index* indexPtr() const { return m_indices; } - Index* indexPtr() { return m_indices; } - - inline Scalar& value(size_t i) { return m_values[i]; } - inline const Scalar& value(size_t i) const { return m_values[i]; } - - inline Index& index(size_t i) { return m_indices[i]; } - inline const Index& index(size_t i) const { return m_indices[i]; } - - static CompressedStorage Map(Index* indices, Scalar* values, size_t size) - { - CompressedStorage res; - res.m_indices = indices; - res.m_values = values; - res.m_allocatedSize = res.m_size = size; - return res; - } - - /** \returns the largest \c k such that for all \c j in [0,k) index[\c j]\<\a key */ - inline Index searchLowerIndex(Index key) const - { - return searchLowerIndex(0, m_size, key); - } - - /** \returns the largest \c k in [start,end) such that for all \c j in [start,k) index[\c j]\<\a key */ - inline Index searchLowerIndex(size_t start, size_t end, Index key) const - { - while(end>start) - { - size_t mid = (end+start)>>1; - if (m_indices[mid](start); - } - - /** \returns the stored value at index \a key - * If the value does not exist, then the value \a defaultValue is returned without any insertion. */ - inline Scalar at(Index key, const Scalar& defaultValue = Scalar(0)) const - { - if (m_size==0) - return defaultValue; - else if (key==m_indices[m_size-1]) - return m_values[m_size-1]; - // ^^ optimization: let's first check if it is the last coefficient - // (very common in high level algorithms) - const size_t id = searchLowerIndex(0,m_size-1,key); - return ((id=end) - return Scalar(0); - else if (end>start && key==m_indices[end-1]) - return m_values[end-1]; - // ^^ optimization: let's first check if it is the last coefficient - // (very common in high level algorithms) - const size_t id = searchLowerIndex(start,end-1,key); - return ((id=m_size || m_indices[id]!=key) - { - resize(m_size+1,1); - for (size_t j=m_size-1; j>id; --j) - { - m_indices[j] = m_indices[j-1]; - m_values[j] = m_values[j-1]; - } - m_indices[id] = key; - m_values[id] = defaultValue; - } - return m_values[id]; - } - - void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits::dummy_precision()) - { - size_t k = 0; - size_t n = size(); - for (size_t i=0; i0) { - internal::smart_copy(m_values, m_values+copySize, newValues); - internal::smart_copy(m_indices, m_indices+copySize, newIndices); - } - // delete old stuff - delete[] m_values; - delete[] m_indices; - m_values = newValues; - m_indices = newIndices; - m_allocatedSize = size; - } - - protected: - Scalar* m_values; - Index* m_indices; - size_t m_size; - size_t m_allocatedSize; - -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_COMPRESSED_STORAGE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 5c320e2d2..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -1,245 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H -#define EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H - -namespace Eigen { - -namespace internal { - -template -static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) -{ - typedef typename remove_all::type::Scalar Scalar; - typedef typename remove_all::type::Index Index; - - // make sure to call innerSize/outerSize since we fake the storage order. - Index rows = lhs.innerSize(); - Index cols = rhs.outerSize(); - eigen_assert(lhs.outerSize() == rhs.innerSize()); - - std::vector mask(rows,false); - Matrix values(rows); - Matrix indices(rows); - - // estimate the number of non zero entries - // given a rhs column containing Y non zeros, we assume that the respective Y columns - // of the lhs differs in average of one non zeros, thus the number of non zeros for - // the product of a rhs column with the lhs is X+Y where X is the average number of non zero - // per column of the lhs. - // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) - Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); - - res.setZero(); - res.reserve(Index(estimated_nnz_prod)); - // we compute each column of the result, one after the other - for (Index j=0; j use a quick sort - // otherwise => loop through the entire vector - // In order to avoid to perform an expensive log2 when the - // result is clearly very sparse we use a linear bound up to 200. - //if((nnz<200 && nnz1) std::sort(indices.data(),indices.data()+nnz); - for(Index k=0; k::Flags&RowMajorBit) ? RowMajor : ColMajor, - int RhsStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor, - int ResStorageOrder = (traits::Flags&RowMajorBit) ? RowMajor : ColMajor> -struct conservative_sparse_sparse_product_selector; - -template -struct conservative_sparse_sparse_product_selector -{ - typedef typename remove_all::type LhsCleaned; - typedef typename LhsCleaned::Scalar Scalar; - - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix RowMajorMatrix; - typedef SparseMatrix ColMajorMatrix; - ColMajorMatrix resCol(lhs.rows(),rhs.cols()); - internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol); - // sort the non zeros: - RowMajorMatrix resRow(resCol); - res = resRow; - } -}; - -template -struct conservative_sparse_sparse_product_selector -{ - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix RowMajorMatrix; - RowMajorMatrix rhsRow = rhs; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(rhsRow, lhs, resRow); - res = resRow; - } -}; - -template -struct conservative_sparse_sparse_product_selector -{ - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix RowMajorMatrix; - RowMajorMatrix lhsRow = lhs; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(rhs, lhsRow, resRow); - res = resRow; - } -}; - -template -struct conservative_sparse_sparse_product_selector -{ - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix RowMajorMatrix; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); - res = resRow; - } -}; - - -template -struct conservative_sparse_sparse_product_selector -{ - typedef typename traits::type>::Scalar Scalar; - - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix ColMajorMatrix; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol); - res = resCol; - } -}; - -template -struct conservative_sparse_sparse_product_selector -{ - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix ColMajorMatrix; - ColMajorMatrix lhsCol = lhs; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(lhsCol, rhs, resCol); - res = resCol; - } -}; - -template -struct conservative_sparse_sparse_product_selector -{ - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix ColMajorMatrix; - ColMajorMatrix rhsCol = rhs; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl(lhs, rhsCol, resCol); - res = resCol; - } -}; - -template -struct conservative_sparse_sparse_product_selector -{ - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) - { - typedef SparseMatrix RowMajorMatrix; - typedef SparseMatrix ColMajorMatrix; - RowMajorMatrix resRow(lhs.rows(),rhs.cols()); - internal::conservative_sparse_sparse_product_impl(rhs, lhs, resRow); - // sort the non zeros: - ColMajorMatrix resCol(resRow); - res = resCol; - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CONSERVATIVESPARSESPARSEPRODUCT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/MappedSparseMatrix.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/MappedSparseMatrix.h index ab1a266a9..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/MappedSparseMatrix.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/MappedSparseMatrix.h @@ -1,181 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_MAPPED_SPARSEMATRIX_H -#define EIGEN_MAPPED_SPARSEMATRIX_H - -namespace Eigen { - -/** \class MappedSparseMatrix - * - * \brief Sparse matrix - * - * \param _Scalar the scalar type, i.e. the type of the coefficients - * - * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme. - * - */ -namespace internal { -template -struct traits > : traits > -{}; -} - -template -class MappedSparseMatrix - : public SparseMatrixBase > -{ - public: - EIGEN_SPARSE_PUBLIC_INTERFACE(MappedSparseMatrix) - enum { IsRowMajor = Base::IsRowMajor }; - - protected: - - Index m_outerSize; - Index m_innerSize; - Index m_nnz; - Index* m_outerIndex; - Index* m_innerIndices; - Scalar* m_values; - - public: - - inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; } - inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; } - inline Index innerSize() const { return m_innerSize; } - inline Index outerSize() const { return m_outerSize; } - - bool isCompressed() const { return true; } - - //---------------------------------------- - // direct access interface - inline const Scalar* valuePtr() const { return m_values; } - inline Scalar* valuePtr() { return m_values; } - - inline const Index* innerIndexPtr() const { return m_innerIndices; } - inline Index* innerIndexPtr() { return m_innerIndices; } - - inline const Index* outerIndexPtr() const { return m_outerIndex; } - inline Index* outerIndexPtr() { return m_outerIndex; } - //---------------------------------------- - - inline Scalar coeff(Index row, Index col) const - { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - Index start = m_outerIndex[outer]; - Index end = m_outerIndex[outer+1]; - if (start==end) - return Scalar(0); - else if (end>0 && inner==m_innerIndices[end-1]) - return m_values[end-1]; - // ^^ optimization: let's first check if it is the last coefficient - // (very common in high level algorithms) - - const Index* r = std::lower_bound(&m_innerIndices[start],&m_innerIndices[end-1],inner); - const Index id = r-&m_innerIndices[0]; - return ((*r==inner) && (id=start && "you probably called coeffRef on a non finalized matrix"); - eigen_assert(end>start && "coeffRef cannot be called on a zero coefficient"); - Index* r = std::lower_bound(&m_innerIndices[start],&m_innerIndices[end],inner); - const Index id = r-&m_innerIndices[0]; - eigen_assert((*r==inner) && (id -class MappedSparseMatrix::InnerIterator -{ - public: - InnerIterator(const MappedSparseMatrix& mat, Index outer) - : m_matrix(mat), - m_outer(outer), - m_id(mat.outerIndexPtr()[outer]), - m_start(m_id), - m_end(mat.outerIndexPtr()[outer+1]) - {} - - inline InnerIterator& operator++() { m_id++; return *this; } - - inline Scalar value() const { return m_matrix.valuePtr()[m_id]; } - inline Scalar& valueRef() { return const_cast(m_matrix.valuePtr()[m_id]); } - - inline Index index() const { return m_matrix.innerIndexPtr()[m_id]; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - inline operator bool() const { return (m_id < m_end) && (m_id>=m_start); } - - protected: - const MappedSparseMatrix& m_matrix; - const Index m_outer; - Index m_id; - const Index m_start; - const Index m_end; -}; - -template -class MappedSparseMatrix::ReverseInnerIterator -{ - public: - ReverseInnerIterator(const MappedSparseMatrix& mat, Index outer) - : m_matrix(mat), - m_outer(outer), - m_id(mat.outerIndexPtr()[outer+1]), - m_start(mat.outerIndexPtr()[outer]), - m_end(m_id) - {} - - inline ReverseInnerIterator& operator--() { m_id--; return *this; } - - inline Scalar value() const { return m_matrix.valuePtr()[m_id-1]; } - inline Scalar& valueRef() { return const_cast(m_matrix.valuePtr()[m_id-1]); } - - inline Index index() const { return m_matrix.innerIndexPtr()[m_id-1]; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - inline operator bool() const { return (m_id <= m_end) && (m_id>m_start); } - - protected: - const MappedSparseMatrix& m_matrix; - const Index m_outer; - Index m_id; - const Index m_start; - const Index m_end; -}; - -} // end namespace Eigen - -#endif // EIGEN_MAPPED_SPARSEMATRIX_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseBlock.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseBlock.h index 99886079d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseBlock.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseBlock.h @@ -1,623 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_BLOCK_H -#define EIGEN_SPARSE_BLOCK_H - -namespace Eigen { - -template -class BlockImpl - : public SparseMatrixBase > -{ -public: - typedef Block BlockType; - enum { IsRowMajor = internal::traits::IsRowMajor }; -protected: - typedef typename internal::remove_all::type _MatrixTypeNested; - enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; -public: - EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) - - class InnerIterator: public XprType::InnerIterator - { - typedef typename BlockImpl::Index Index; - public: - inline InnerIterator(const Block& xpr, Index outer) - : XprType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - class ReverseInnerIterator: public XprType::ReverseInnerIterator - { - typedef typename BlockImpl::Index Index; - public: - inline ReverseInnerIterator(const BlockType& xpr, Index outer) - : XprType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - - inline BlockImpl(const XprType& xpr, int i) - : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize) - {} - - inline BlockImpl(const XprType& xpr, int startRow, int startCol, int blockRows, int blockCols) - : m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols) - {} - - inline const Scalar coeff(int row, int col) const - { - return m_matrix.coeff(row + IsRowMajor ? m_outerStart : 0, col +IsRowMajor ? 0 : m_outerStart); - } - - inline const Scalar coeff(int index) const - { - return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); - } - - EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } - - protected: - - typename XprType::Nested m_matrix; - Index m_outerStart; - const internal::variable_if_dynamic m_outerSize; - - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - private: - Index nonZeros() const; -}; - - -/*************************************************************************** -* specialisation for SparseMatrix -***************************************************************************/ - -template -class BlockImpl,BlockRows,BlockCols,true,Sparse> - : public SparseMatrixBase,BlockRows,BlockCols,true> > -{ - typedef SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType; - typedef typename internal::remove_all::type _MatrixTypeNested; - typedef Block ConstBlockType; -public: - typedef Block BlockType; - enum { IsRowMajor = internal::traits::IsRowMajor }; - EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) -protected: - enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; -public: - - class InnerIterator: public SparseMatrixType::InnerIterator - { - public: - inline InnerIterator(const BlockType& xpr, Index outer) - : SparseMatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - class ReverseInnerIterator: public SparseMatrixType::ReverseInnerIterator - { - public: - inline ReverseInnerIterator(const BlockType& xpr, Index outer) - : SparseMatrixType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - - inline BlockImpl(const SparseMatrixType& xpr, int i) - : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize) - {} - - inline BlockImpl(const SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols) - : m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols) - {} - - template - inline BlockType& operator=(const SparseMatrixBase& other) - { - typedef typename internal::remove_all::type _NestedMatrixType; - _NestedMatrixType& matrix = const_cast<_NestedMatrixType&>(m_matrix);; - // This assignement is slow if this vector set is not empty - // and/or it is not at the end of the nonzeros of the underlying matrix. - - // 1 - eval to a temporary to avoid transposition and/or aliasing issues - SparseMatrix tmp(other); - - // 2 - let's check whether there is enough allocated memory - Index nnz = tmp.nonZeros(); - Index start = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block - Index end = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending posiiton of the current block - Index block_size = end - start; // available room in the current block - Index tail_size = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end; - - Index free_size = m_matrix.isCompressed() - ? Index(matrix.data().allocatedSize()) + block_size - : block_size; - - if(nnz>free_size) - { - // realloc manually to reduce copies - typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz); - - std::memcpy(newdata.valuePtr(), m_matrix.data().valuePtr(), start*sizeof(Scalar)); - std::memcpy(newdata.indexPtr(), m_matrix.data().indexPtr(), start*sizeof(Index)); - - std::memcpy(newdata.valuePtr() + start, tmp.data().valuePtr(), nnz*sizeof(Scalar)); - std::memcpy(newdata.indexPtr() + start, tmp.data().indexPtr(), nnz*sizeof(Index)); - - std::memcpy(newdata.valuePtr()+start+nnz, matrix.data().valuePtr()+end, tail_size*sizeof(Scalar)); - std::memcpy(newdata.indexPtr()+start+nnz, matrix.data().indexPtr()+end, tail_size*sizeof(Index)); - - newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz); - - matrix.data().swap(newdata); - } - else - { - // no need to realloc, simply copy the tail at its respective position and insert tmp - matrix.data().resize(start + nnz + tail_size); - - std::memmove(matrix.data().valuePtr()+start+nnz, matrix.data().valuePtr()+end, tail_size*sizeof(Scalar)); - std::memmove(matrix.data().indexPtr()+start+nnz, matrix.data().indexPtr()+end, tail_size*sizeof(Index)); - - std::memcpy(matrix.data().valuePtr()+start, tmp.data().valuePtr(), nnz*sizeof(Scalar)); - std::memcpy(matrix.data().indexPtr()+start, tmp.data().indexPtr(), nnz*sizeof(Index)); - } - - // update innerNonZeros - if(!m_matrix.isCompressed()) - for(Index j=0; j(other); - } - - inline const Scalar* valuePtr() const - { return m_matrix.valuePtr() + m_matrix.outerIndexPtr()[m_outerStart]; } - inline Scalar* valuePtr() - { return m_matrix.const_cast_derived().valuePtr() + m_matrix.outerIndexPtr()[m_outerStart]; } - - inline const Index* innerIndexPtr() const - { return m_matrix.innerIndexPtr() + m_matrix.outerIndexPtr()[m_outerStart]; } - inline Index* innerIndexPtr() - { return m_matrix.const_cast_derived().innerIndexPtr() + m_matrix.outerIndexPtr()[m_outerStart]; } - - inline const Index* outerIndexPtr() const - { return m_matrix.outerIndexPtr() + m_outerStart; } - inline Index* outerIndexPtr() - { return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; } - - Index nonZeros() const - { - if(m_matrix.isCompressed()) - return std::size_t(m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]) - - std::size_t(m_matrix.outerIndexPtr()[m_outerStart]); - else if(m_outerSize.value()==0) - return 0; - else - return Map >(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum(); - } - - inline Scalar& coeffRef(int row, int col) - { - return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); - } - - inline const Scalar coeff(int row, int col) const - { - return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); - } - - inline const Scalar coeff(int index) const - { - return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); - } - - const Scalar& lastCoeff() const - { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(BlockImpl); - eigen_assert(nonZeros()>0); - if(m_matrix.isCompressed()) - return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1]; - else - return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart]+m_matrix.innerNonZeroPtr()[m_outerStart]-1]; - } - - EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } - - protected: - - typename SparseMatrixType::Nested m_matrix; - Index m_outerStart; - const internal::variable_if_dynamic m_outerSize; - -}; - - -template -class BlockImpl,BlockRows,BlockCols,true,Sparse> - : public SparseMatrixBase,BlockRows,BlockCols,true> > -{ - typedef SparseMatrix<_Scalar, _Options, _Index> SparseMatrixType; - typedef typename internal::remove_all::type _MatrixTypeNested; -public: - typedef Block BlockType; - enum { IsRowMajor = internal::traits::IsRowMajor }; - EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) -protected: - enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; -public: - - class InnerIterator: public SparseMatrixType::InnerIterator - { - public: - inline InnerIterator(const BlockType& xpr, Index outer) - : SparseMatrixType::InnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - class ReverseInnerIterator: public SparseMatrixType::ReverseInnerIterator - { - public: - inline ReverseInnerIterator(const BlockType& xpr, Index outer) - : SparseMatrixType::ReverseInnerIterator(xpr.m_matrix, xpr.m_outerStart + outer), m_outer(outer) - {} - inline Index row() const { return IsRowMajor ? m_outer : this->index(); } - inline Index col() const { return IsRowMajor ? this->index() : m_outer; } - protected: - Index m_outer; - }; - - inline BlockImpl(const SparseMatrixType& xpr, int i) - : m_matrix(xpr), m_outerStart(i), m_outerSize(OuterSize) - {} - - inline BlockImpl(const SparseMatrixType& xpr, int startRow, int startCol, int blockRows, int blockCols) - : m_matrix(xpr), m_outerStart(IsRowMajor ? startRow : startCol), m_outerSize(IsRowMajor ? blockRows : blockCols) - {} - - inline const Scalar* valuePtr() const - { return m_matrix.valuePtr() + m_matrix.outerIndexPtr()[m_outerStart]; } - - inline const Index* innerIndexPtr() const - { return m_matrix.innerIndexPtr() + m_matrix.outerIndexPtr()[m_outerStart]; } - - inline const Index* outerIndexPtr() const - { return m_matrix.outerIndexPtr() + m_outerStart; } - - Index nonZeros() const - { - if(m_matrix.isCompressed()) - return std::size_t(m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]) - - std::size_t(m_matrix.outerIndexPtr()[m_outerStart]); - else if(m_outerSize.value()==0) - return 0; - else - return Map >(m_matrix.innerNonZeroPtr()+m_outerStart, m_outerSize.value()).sum(); - } - - inline const Scalar coeff(int row, int col) const - { - return m_matrix.coeff(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); - } - - inline const Scalar coeff(int index) const - { - return m_matrix.coeff(IsRowMajor ? m_outerStart : index, IsRowMajor ? index : m_outerStart); - } - - const Scalar& lastCoeff() const - { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(BlockImpl); - eigen_assert(nonZeros()>0); - if(m_matrix.isCompressed()) - return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart+1]-1]; - else - return m_matrix.valuePtr()[m_matrix.outerIndexPtr()[m_outerStart]+m_matrix.innerNonZeroPtr()[m_outerStart]-1]; - } - - EIGEN_STRONG_INLINE Index rows() const { return IsRowMajor ? m_outerSize.value() : m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return IsRowMajor ? m_matrix.cols() : m_outerSize.value(); } - - protected: - - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - - typename SparseMatrixType::Nested m_matrix; - Index m_outerStart; - const internal::variable_if_dynamic m_outerSize; -}; - -//---------- - -/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this - * is col-major (resp. row-major). - */ -template -typename SparseMatrixBase::InnerVectorReturnType SparseMatrixBase::innerVector(Index outer) -{ return InnerVectorReturnType(derived(), outer); } - -/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this - * is col-major (resp. row-major). Read-only. - */ -template -const typename SparseMatrixBase::ConstInnerVectorReturnType SparseMatrixBase::innerVector(Index outer) const -{ return ConstInnerVectorReturnType(derived(), outer); } - -/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this - * is col-major (resp. row-major). - */ -template -typename SparseMatrixBase::InnerVectorsReturnType -SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) -{ - return Block(derived(), - IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, - IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize); - -} - -/** \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this - * is col-major (resp. row-major). Read-only. - */ -template -const typename SparseMatrixBase::ConstInnerVectorsReturnType -SparseMatrixBase::innerVectors(Index outerStart, Index outerSize) const -{ - return Block(derived(), - IsRowMajor ? outerStart : 0, IsRowMajor ? 0 : outerStart, - IsRowMajor ? outerSize : rows(), IsRowMajor ? cols() : outerSize); - -} - -namespace internal { - -template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel, - bool OuterVector = (BlockCols==1 && XprType::IsRowMajor) || (BlockRows==1 && !XprType::IsRowMajor)> -class GenericSparseBlockInnerIteratorImpl; - -} - -/** Generic implementation of sparse Block expression. - * Real-only. - */ -template -class BlockImpl - : public SparseMatrixBase >, internal::no_assignment_operator -{ - typedef typename internal::remove_all::type _MatrixTypeNested; - public: - typedef Block BlockType; - enum { IsRowMajor = internal::traits::IsRowMajor }; - EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType) - - /** Column or Row constructor - */ - inline BlockImpl(const XprType& xpr, int i) - : m_matrix(xpr), - m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), - m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0), - m_blockRows(BlockRows==1 ? 1 : xpr.rows()), - m_blockCols(BlockCols==1 ? 1 : xpr.cols()) - {} - - /** Dynamic-size constructor - */ - inline BlockImpl(const XprType& xpr, int startRow, int startCol, int blockRows, int blockCols) - : m_matrix(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(blockRows), m_blockCols(blockCols) - {} - - inline int rows() const { return m_blockRows.value(); } - inline int cols() const { return m_blockCols.value(); } - - inline Scalar& coeffRef(int row, int col) - { - return m_matrix.const_cast_derived() - .coeffRef(row + m_startRow.value(), col + m_startCol.value()); - } - - inline const Scalar coeff(int row, int col) const - { - return m_matrix.coeff(row + m_startRow.value(), col + m_startCol.value()); - } - - inline Scalar& coeffRef(int index) - { - return m_matrix.const_cast_derived() - .coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), - m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); - } - - inline const Scalar coeff(int index) const - { - return m_matrix - .coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), - m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); - } - - inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; } - - typedef internal::GenericSparseBlockInnerIteratorImpl InnerIterator; - - class ReverseInnerIterator : public _MatrixTypeNested::ReverseInnerIterator - { - typedef typename _MatrixTypeNested::ReverseInnerIterator Base; - const BlockType& m_block; - Index m_begin; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const BlockType& block, Index outer) - : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())), - m_block(block), - m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - { - while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) ) - Base::operator--(); - } - - inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); } - inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); } - inline Index row() const { return Base::row() - m_block.m_startRow.value(); } - inline Index col() const { return Base::col() - m_block.m_startCol.value(); } - - inline operator bool() const { return Base::operator bool() && Base::index() >= m_begin; } - }; - protected: - friend class internal::GenericSparseBlockInnerIteratorImpl; - friend class ReverseInnerIterator; - - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - - typename XprType::Nested m_matrix; - const internal::variable_if_dynamic m_startRow; - const internal::variable_if_dynamic m_startCol; - const internal::variable_if_dynamic m_blockRows; - const internal::variable_if_dynamic m_blockCols; - private: - Index nonZeros() const; -}; - -namespace internal { - template - class GenericSparseBlockInnerIteratorImpl : public internal::remove_all::type::InnerIterator - { - public: - typedef Block BlockType; - enum { - IsRowMajor = BlockType::IsRowMajor - }; - typedef typename BlockType::Index Index; - protected: - typedef typename internal::remove_all::type _MatrixTypeNested; - typedef typename _MatrixTypeNested::InnerIterator Base; - const BlockType& m_block; - Index m_end; - public: - - EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer) - : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())), - m_block(block), - m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value()) - { - while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) ) - Base::operator++(); - } - - inline Index index() const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); } - inline Index outer() const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); } - inline Index row() const { return Base::row() - m_block.m_startRow.value(); } - inline Index col() const { return Base::col() - m_block.m_startCol.value(); } - - inline operator bool() const { return Base::operator bool() && Base::index() < m_end; } - }; - - // Row vector of a column-major sparse matrix or column of a row-major one. - template - class GenericSparseBlockInnerIteratorImpl - { - public: - typedef Block BlockType; - enum { - IsRowMajor = BlockType::IsRowMajor - }; - typedef typename BlockType::Index Index; - typedef typename BlockType::Scalar Scalar; - protected: - typedef typename internal::remove_all::type _MatrixTypeNested; - const BlockType& m_block; - Index m_outerPos; - Index m_innerIndex; - Scalar m_value; - Index m_end; - public: - - EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0) - : - m_block(block), - m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry - m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()), - m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value()) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - - ++(*this); - } - - inline Index index() const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); } - inline Index outer() const { return 0; } - inline Index row() const { return IsRowMajor ? 0 : index(); } - inline Index col() const { return IsRowMajor ? index() : 0; } - - inline Scalar value() const { return m_value; } - - inline GenericSparseBlockInnerIteratorImpl& operator++() - { - // search next non-zero entry - while(m_outerPos -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -/* - - * NOTE: This file is the modified version of sp_coletree.c file in SuperLU - - * -- SuperLU routine (version 3.1) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * August 1, 2008 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSE_COLETREE_H -#define SPARSE_COLETREE_H - -namespace Eigen { - -namespace internal { - -/** Find the root of the tree/set containing the vertex i : Use Path halving */ -template -Index etree_find (Index i, IndexVector& pp) -{ - Index p = pp(i); // Parent - Index gp = pp(p); // Grand parent - while (gp != p) - { - pp(i) = gp; // Parent pointer on find path is changed to former grand parent - i = gp; - p = pp(i); - gp = pp(p); - } - return p; -} - -/** Compute the column elimination tree of a sparse matrix - * \param mat The matrix in column-major format. - * \param parent The elimination tree - * \param firstRowElt The column index of the first element in each row - * \param perm The permutation to apply to the column of \b mat - */ -template -int coletree(const MatrixType& mat, IndexVector& parent, IndexVector& firstRowElt, typename MatrixType::Index *perm=0) -{ - typedef typename MatrixType::Index Index; - Index nc = mat.cols(); // Number of columns - Index m = mat.rows(); - Index diagSize = (std::min)(nc,m); - IndexVector root(nc); // root of subtree of etree - root.setZero(); - IndexVector pp(nc); // disjoint sets - pp.setZero(); // Initialize disjoint sets - parent.resize(mat.cols()); - //Compute first nonzero column in each row - Index row,col; - firstRowElt.resize(m); - firstRowElt.setConstant(nc); - firstRowElt.segment(0, diagSize).setLinSpaced(diagSize, 0, diagSize-1); - bool found_diag; - for (col = 0; col < nc; col++) - { - Index pcol = col; - if(perm) pcol = perm[col]; - for (typename MatrixType::InnerIterator it(mat, pcol); it; ++it) - { - row = it.row(); - firstRowElt(row) = (std::min)(firstRowElt(row), col); - } - } - /* Compute etree by Liu's algorithm for symmetric matrices, - except use (firstRowElt[r],c) in place of an edge (r,c) of A. - Thus each row clique in A'*A is replaced by a star - centered at its first vertex, which has the same fill. */ - Index rset, cset, rroot; - for (col = 0; col < nc; col++) - { - found_diag = col>=m; - pp(col) = col; - cset = col; - root(cset) = col; - parent(col) = nc; - /* The diagonal element is treated here even if it does not exist in the matrix - * hence the loop is executed once more */ - Index pcol = col; - if(perm) pcol = perm[col]; - for (typename MatrixType::InnerIterator it(mat, pcol); it||!found_diag; ++it) - { // A sequence of interleaved find and union is performed - Index i = col; - if(it) i = it.index(); - if (i == col) found_diag = true; - - row = firstRowElt(i); - if (row >= col) continue; - rset = internal::etree_find(row, pp); // Find the name of the set containing row - rroot = root(rset); - if (rroot != col) - { - parent(rroot) = col; - pp(cset) = rset; - cset = rset; - root(cset) = col; - } - } - } - return 0; -} - -/** - * Depth-first search from vertex n. No recursion. - * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. -*/ -template -void nr_etdfs (Index n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, Index postnum) -{ - Index current = n, first, next; - while (postnum != n) - { - // No kid for the current node - first = first_kid(current); - - // no kid for the current node - if (first == -1) - { - // Numbering this node because it has no kid - post(current) = postnum++; - - // looking for the next kid - next = next_kid(current); - while (next == -1) - { - // No more kids : back to the parent node - current = parent(current); - // numbering the parent node - post(current) = postnum++; - - // Get the next kid - next = next_kid(current); - } - // stopping criterion - if (postnum == n+1) return; - - // Updating current node - current = next; - } - else - { - current = first; - } - } -} - - -/** - * \brief Post order a tree - * \param n the number of nodes - * \param parent Input tree - * \param post postordered tree - */ -template -void treePostorder(Index n, IndexVector& parent, IndexVector& post) -{ - IndexVector first_kid, next_kid; // Linked list of children - Index postnum; - // Allocate storage for working arrays and results - first_kid.resize(n+1); - next_kid.setZero(n+1); - post.setZero(n+1); - - // Set up structure describing children - Index v, dad; - first_kid.setConstant(-1); - for (v = n-1; v >= 0; v--) - { - dad = parent(v); - next_kid(v) = first_kid(dad); - first_kid(dad) = v; - } - - // Depth-first search from dummy root vertex #n - postnum = 0; - internal::nr_etdfs(n, parent, first_kid, next_kid, post, postnum); -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // SPARSE_COLETREE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index 546273759..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -1,324 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_CWISE_BINARY_OP_H -#define EIGEN_SPARSE_CWISE_BINARY_OP_H - -namespace Eigen { - -// Here we have to handle 3 cases: -// 1 - sparse op dense -// 2 - dense op sparse -// 3 - sparse op sparse -// We also need to implement a 4th iterator for: -// 4 - dense op dense -// Finally, we also need to distinguish between the product and other operations : -// configuration returned mode -// 1 - sparse op dense product sparse -// generic dense -// 2 - dense op sparse product sparse -// generic dense -// 3 - sparse op sparse product sparse -// generic sparse -// 4 - dense op dense product dense -// generic dense - -namespace internal { - -template<> struct promote_storage_type -{ typedef Sparse ret; }; - -template<> struct promote_storage_type -{ typedef Sparse ret; }; - -template::StorageKind, - typename _RhsStorageMode = typename traits::StorageKind> -class sparse_cwise_binary_op_inner_iterator_selector; - -} // end namespace internal - -template -class CwiseBinaryOpImpl - : public SparseMatrixBase > -{ - public: - class InnerIterator; - class ReverseInnerIterator; - typedef CwiseBinaryOp Derived; - EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) - CwiseBinaryOpImpl() - { - EIGEN_STATIC_ASSERT(( - (!internal::is_same::StorageKind, - typename internal::traits::StorageKind>::value) - || ((Lhs::Flags&RowMajorBit) == (Rhs::Flags&RowMajorBit))), - THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH); - } -}; - -template -class CwiseBinaryOpImpl::InnerIterator - : public internal::sparse_cwise_binary_op_inner_iterator_selector::InnerIterator> -{ - public: - typedef typename Lhs::Index Index; - typedef internal::sparse_cwise_binary_op_inner_iterator_selector< - BinaryOp,Lhs,Rhs, InnerIterator> Base; - - // NOTE: we have to prefix Index by "typename Lhs::" to avoid an ICE with VC11 - EIGEN_STRONG_INLINE InnerIterator(const CwiseBinaryOpImpl& binOp, typename Lhs::Index outer) - : Base(binOp.derived(),outer) - {} -}; - -/*************************************************************************** -* Implementation of inner-iterators -***************************************************************************/ - -// template struct internal::func_is_conjunction { enum { ret = false }; }; -// template struct internal::func_is_conjunction > { enum { ret = true }; }; - -// TODO generalize the internal::scalar_product_op specialization to all conjunctions if any ! - -namespace internal { - -// sparse - sparse (generic) -template -class sparse_cwise_binary_op_inner_iterator_selector -{ - typedef CwiseBinaryOp CwiseBinaryXpr; - typedef typename traits::Scalar Scalar; - typedef typename traits::_LhsNested _LhsNested; - typedef typename traits::_RhsNested _RhsNested; - typedef typename _LhsNested::InnerIterator LhsIterator; - typedef typename _RhsNested::InnerIterator RhsIterator; - typedef typename Lhs::Index Index; - - public: - - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()) - { - this->operator++(); - } - - EIGEN_STRONG_INLINE Derived& operator++() - { - if (m_lhsIter && m_rhsIter && (m_lhsIter.index() == m_rhsIter.index())) - { - m_id = m_lhsIter.index(); - m_value = m_functor(m_lhsIter.value(), m_rhsIter.value()); - ++m_lhsIter; - ++m_rhsIter; - } - else if (m_lhsIter && (!m_rhsIter || (m_lhsIter.index() < m_rhsIter.index()))) - { - m_id = m_lhsIter.index(); - m_value = m_functor(m_lhsIter.value(), Scalar(0)); - ++m_lhsIter; - } - else if (m_rhsIter && (!m_lhsIter || (m_lhsIter.index() > m_rhsIter.index()))) - { - m_id = m_rhsIter.index(); - m_value = m_functor(Scalar(0), m_rhsIter.value()); - ++m_rhsIter; - } - else - { - m_value = 0; // this is to avoid a compilation warning - m_id = -1; - } - return *static_cast(this); - } - - EIGEN_STRONG_INLINE Scalar value() const { return m_value; } - - EIGEN_STRONG_INLINE Index index() const { return m_id; } - EIGEN_STRONG_INLINE Index row() const { return Lhs::IsRowMajor ? m_lhsIter.row() : index(); } - EIGEN_STRONG_INLINE Index col() const { return Lhs::IsRowMajor ? index() : m_lhsIter.col(); } - - EIGEN_STRONG_INLINE operator bool() const { return m_id>=0; } - - protected: - LhsIterator m_lhsIter; - RhsIterator m_rhsIter; - const BinaryOp& m_functor; - Scalar m_value; - Index m_id; -}; - -// sparse - sparse (product) -template -class sparse_cwise_binary_op_inner_iterator_selector, Lhs, Rhs, Derived, Sparse, Sparse> -{ - typedef scalar_product_op BinaryFunc; - typedef CwiseBinaryOp CwiseBinaryXpr; - typedef typename CwiseBinaryXpr::Scalar Scalar; - typedef typename traits::_LhsNested _LhsNested; - typedef typename _LhsNested::InnerIterator LhsIterator; - typedef typename traits::_RhsNested _RhsNested; - typedef typename _RhsNested::InnerIterator RhsIterator; - typedef typename Lhs::Index Index; - public: - - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_lhsIter(xpr.lhs(),outer), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()) - { - while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index())) - { - if (m_lhsIter.index() < m_rhsIter.index()) - ++m_lhsIter; - else - ++m_rhsIter; - } - } - - EIGEN_STRONG_INLINE Derived& operator++() - { - ++m_lhsIter; - ++m_rhsIter; - while (m_lhsIter && m_rhsIter && (m_lhsIter.index() != m_rhsIter.index())) - { - if (m_lhsIter.index() < m_rhsIter.index()) - ++m_lhsIter; - else - ++m_rhsIter; - } - return *static_cast(this); - } - - EIGEN_STRONG_INLINE Scalar value() const { return m_functor(m_lhsIter.value(), m_rhsIter.value()); } - - EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); } - EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); } - EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } - - EIGEN_STRONG_INLINE operator bool() const { return (m_lhsIter && m_rhsIter); } - - protected: - LhsIterator m_lhsIter; - RhsIterator m_rhsIter; - const BinaryFunc& m_functor; -}; - -// sparse - dense (product) -template -class sparse_cwise_binary_op_inner_iterator_selector, Lhs, Rhs, Derived, Sparse, Dense> -{ - typedef scalar_product_op BinaryFunc; - typedef CwiseBinaryOp CwiseBinaryXpr; - typedef typename CwiseBinaryXpr::Scalar Scalar; - typedef typename traits::_LhsNested _LhsNested; - typedef typename traits::RhsNested RhsNested; - typedef typename _LhsNested::InnerIterator LhsIterator; - typedef typename Lhs::Index Index; - enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit }; - public: - - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_rhs(xpr.rhs()), m_lhsIter(xpr.lhs(),outer), m_functor(xpr.functor()), m_outer(outer) - {} - - EIGEN_STRONG_INLINE Derived& operator++() - { - ++m_lhsIter; - return *static_cast(this); - } - - EIGEN_STRONG_INLINE Scalar value() const - { return m_functor(m_lhsIter.value(), - m_rhs.coeff(IsRowMajor?m_outer:m_lhsIter.index(),IsRowMajor?m_lhsIter.index():m_outer)); } - - EIGEN_STRONG_INLINE Index index() const { return m_lhsIter.index(); } - EIGEN_STRONG_INLINE Index row() const { return m_lhsIter.row(); } - EIGEN_STRONG_INLINE Index col() const { return m_lhsIter.col(); } - - EIGEN_STRONG_INLINE operator bool() const { return m_lhsIter; } - - protected: - RhsNested m_rhs; - LhsIterator m_lhsIter; - const BinaryFunc m_functor; - const Index m_outer; -}; - -// sparse - dense (product) -template -class sparse_cwise_binary_op_inner_iterator_selector, Lhs, Rhs, Derived, Dense, Sparse> -{ - typedef scalar_product_op BinaryFunc; - typedef CwiseBinaryOp CwiseBinaryXpr; - typedef typename CwiseBinaryXpr::Scalar Scalar; - typedef typename traits::_RhsNested _RhsNested; - typedef typename _RhsNested::InnerIterator RhsIterator; - typedef typename Lhs::Index Index; - - enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit }; - public: - - EIGEN_STRONG_INLINE sparse_cwise_binary_op_inner_iterator_selector(const CwiseBinaryXpr& xpr, Index outer) - : m_xpr(xpr), m_rhsIter(xpr.rhs(),outer), m_functor(xpr.functor()), m_outer(outer) - {} - - EIGEN_STRONG_INLINE Derived& operator++() - { - ++m_rhsIter; - return *static_cast(this); - } - - EIGEN_STRONG_INLINE Scalar value() const - { return m_functor(m_xpr.lhs().coeff(IsRowMajor?m_outer:m_rhsIter.index(),IsRowMajor?m_rhsIter.index():m_outer), m_rhsIter.value()); } - - EIGEN_STRONG_INLINE Index index() const { return m_rhsIter.index(); } - EIGEN_STRONG_INLINE Index row() const { return m_rhsIter.row(); } - EIGEN_STRONG_INLINE Index col() const { return m_rhsIter.col(); } - - EIGEN_STRONG_INLINE operator bool() const { return m_rhsIter; } - - protected: - const CwiseBinaryXpr& m_xpr; - RhsIterator m_rhsIter; - const BinaryFunc& m_functor; - const Index m_outer; -}; - -} // end namespace internal - -/*************************************************************************** -* Implementation of SparseMatrixBase and SparseCwise functions/operators -***************************************************************************/ - -template -template -EIGEN_STRONG_INLINE Derived & -SparseMatrixBase::operator-=(const SparseMatrixBase &other) -{ - return derived() = derived() - other.derived(); -} - -template -template -EIGEN_STRONG_INLINE Derived & -SparseMatrixBase::operator+=(const SparseMatrixBase& other) -{ - return derived() = derived() + other.derived(); -} - -template -template -EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type -SparseMatrixBase::cwiseProduct(const MatrixBase &other) const -{ - return typename CwiseProductDenseReturnType::Type(derived(), other.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_CWISE_BINARY_OP_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseUnaryOp.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseUnaryOp.h index 5a50c7803..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseCwiseUnaryOp.h @@ -1,163 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_CWISE_UNARY_OP_H -#define EIGEN_SPARSE_CWISE_UNARY_OP_H - -namespace Eigen { - -template -class CwiseUnaryOpImpl - : public SparseMatrixBase > -{ - public: - - class InnerIterator; - class ReverseInnerIterator; - - typedef CwiseUnaryOp Derived; - EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) - - protected: - typedef typename internal::traits::_XprTypeNested _MatrixTypeNested; - typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator; - typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator; -}; - -template -class CwiseUnaryOpImpl::InnerIterator - : public CwiseUnaryOpImpl::MatrixTypeIterator -{ - typedef typename CwiseUnaryOpImpl::Scalar Scalar; - typedef typename CwiseUnaryOpImpl::MatrixTypeIterator Base; - public: - - EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) - {} - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { Base::operator++(); return *this; } - - EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); } - - protected: - const UnaryOp m_functor; - private: - typename CwiseUnaryOpImpl::Scalar& valueRef(); -}; - -template -class CwiseUnaryOpImpl::ReverseInnerIterator - : public CwiseUnaryOpImpl::MatrixTypeReverseIterator -{ - typedef typename CwiseUnaryOpImpl::Scalar Scalar; - typedef typename CwiseUnaryOpImpl::MatrixTypeReverseIterator Base; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryOpImpl& unaryOp, typename CwiseUnaryOpImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) - {} - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - EIGEN_STRONG_INLINE typename CwiseUnaryOpImpl::Scalar value() const { return m_functor(Base::value()); } - - protected: - const UnaryOp m_functor; - private: - typename CwiseUnaryOpImpl::Scalar& valueRef(); -}; - -template -class CwiseUnaryViewImpl - : public SparseMatrixBase > -{ - public: - - class InnerIterator; - class ReverseInnerIterator; - - typedef CwiseUnaryView Derived; - EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) - - protected: - typedef typename internal::traits::_MatrixTypeNested _MatrixTypeNested; - typedef typename _MatrixTypeNested::InnerIterator MatrixTypeIterator; - typedef typename _MatrixTypeNested::ReverseInnerIterator MatrixTypeReverseIterator; -}; - -template -class CwiseUnaryViewImpl::InnerIterator - : public CwiseUnaryViewImpl::MatrixTypeIterator -{ - typedef typename CwiseUnaryViewImpl::Scalar Scalar; - typedef typename CwiseUnaryViewImpl::MatrixTypeIterator Base; - public: - - EIGEN_STRONG_INLINE InnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) - {} - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { Base::operator++(); return *this; } - - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); } - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); } - - protected: - const ViewOp m_functor; -}; - -template -class CwiseUnaryViewImpl::ReverseInnerIterator - : public CwiseUnaryViewImpl::MatrixTypeReverseIterator -{ - typedef typename CwiseUnaryViewImpl::Scalar Scalar; - typedef typename CwiseUnaryViewImpl::MatrixTypeReverseIterator Base; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const CwiseUnaryViewImpl& unaryOp, typename CwiseUnaryViewImpl::Index outer) - : Base(unaryOp.derived().nestedExpression(),outer), m_functor(unaryOp.derived().functor()) - {} - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar value() const { return m_functor(Base::value()); } - EIGEN_STRONG_INLINE typename CwiseUnaryViewImpl::Scalar& valueRef() { return m_functor(Base::valueRef()); } - - protected: - const ViewOp m_functor; -}; - -template -EIGEN_STRONG_INLINE Derived& -SparseMatrixBase::operator*=(const Scalar& other) -{ - for (Index j=0; j -EIGEN_STRONG_INLINE Derived& -SparseMatrixBase::operator/=(const Scalar& other) -{ - for (Index j=0; j -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEDENSEPRODUCT_H -#define EIGEN_SPARSEDENSEPRODUCT_H - -namespace Eigen { - -template struct SparseDenseProductReturnType -{ - typedef SparseTimeDenseProduct Type; -}; - -template struct SparseDenseProductReturnType -{ - typedef typename internal::conditional< - Lhs::IsRowMajor, - SparseDenseOuterProduct, - SparseDenseOuterProduct >::type Type; -}; - -template struct DenseSparseProductReturnType -{ - typedef DenseTimeSparseProduct Type; -}; - -template struct DenseSparseProductReturnType -{ - typedef typename internal::conditional< - Rhs::IsRowMajor, - SparseDenseOuterProduct, - SparseDenseOuterProduct >::type Type; -}; - -namespace internal { - -template -struct traits > -{ - typedef Sparse StorageKind; - typedef typename scalar_product_traits::Scalar, - typename traits::Scalar>::ReturnType Scalar; - typedef typename Lhs::Index Index; - typedef typename Lhs::Nested LhsNested; - typedef typename Rhs::Nested RhsNested; - typedef typename remove_all::type _LhsNested; - typedef typename remove_all::type _RhsNested; - - enum { - LhsCoeffReadCost = traits<_LhsNested>::CoeffReadCost, - RhsCoeffReadCost = traits<_RhsNested>::CoeffReadCost, - - RowsAtCompileTime = Tr ? int(traits::RowsAtCompileTime) : int(traits::RowsAtCompileTime), - ColsAtCompileTime = Tr ? int(traits::ColsAtCompileTime) : int(traits::ColsAtCompileTime), - MaxRowsAtCompileTime = Tr ? int(traits::MaxRowsAtCompileTime) : int(traits::MaxRowsAtCompileTime), - MaxColsAtCompileTime = Tr ? int(traits::MaxColsAtCompileTime) : int(traits::MaxColsAtCompileTime), - - Flags = Tr ? RowMajorBit : 0, - - CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + NumTraits::MulCost - }; -}; - -} // end namespace internal - -template -class SparseDenseOuterProduct - : public SparseMatrixBase > -{ - public: - - typedef SparseMatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(SparseDenseOuterProduct) - typedef internal::traits Traits; - - private: - - typedef typename Traits::LhsNested LhsNested; - typedef typename Traits::RhsNested RhsNested; - typedef typename Traits::_LhsNested _LhsNested; - typedef typename Traits::_RhsNested _RhsNested; - - public: - - class InnerIterator; - - EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - EIGEN_STATIC_ASSERT(!Tr,YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - EIGEN_STRONG_INLINE SparseDenseOuterProduct(const Rhs& rhs, const Lhs& lhs) - : m_lhs(lhs), m_rhs(rhs) - { - EIGEN_STATIC_ASSERT(Tr,YOU_MADE_A_PROGRAMMING_MISTAKE); - } - - EIGEN_STRONG_INLINE Index rows() const { return Tr ? m_rhs.rows() : m_lhs.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return Tr ? m_lhs.cols() : m_rhs.cols(); } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } - - protected: - LhsNested m_lhs; - RhsNested m_rhs; -}; - -template -class SparseDenseOuterProduct::InnerIterator : public _LhsNested::InnerIterator -{ - typedef typename _LhsNested::InnerIterator Base; - typedef typename SparseDenseOuterProduct::Index Index; - public: - EIGEN_STRONG_INLINE InnerIterator(const SparseDenseOuterProduct& prod, Index outer) - : Base(prod.lhs(), 0), m_outer(outer), m_factor(get(prod.rhs(), outer, typename internal::traits::StorageKind() )) - { } - - inline Index outer() const { return m_outer; } - inline Index row() const { return Transpose ? m_outer : Base::index(); } - inline Index col() const { return Transpose ? Base::index() : m_outer; } - - inline Scalar value() const { return Base::value() * m_factor; } - - protected: - static Scalar get(const _RhsNested &rhs, Index outer, Dense = Dense()) - { - return rhs.coeff(outer); - } - - static Scalar get(const _RhsNested &rhs, Index outer, Sparse = Sparse()) - { - typename Traits::_RhsNested::InnerIterator it(rhs, outer); - if (it && it.index()==0) - return it.value(); - - return Scalar(0); - } - - Index m_outer; - Scalar m_factor; -}; - -namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{ - typedef Dense StorageKind; - typedef MatrixXpr XprKind; -}; - -template -struct sparse_time_dense_product_impl; - -template -struct sparse_time_dense_product_impl -{ - typedef typename internal::remove_all::type Lhs; - typedef typename internal::remove_all::type Rhs; - typedef typename internal::remove_all::type Res; - typedef typename Lhs::Index Index; - typedef typename Lhs::InnerIterator LhsInnerIterator; - static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) - { - for(Index c=0; c -struct sparse_time_dense_product_impl -{ - typedef typename internal::remove_all::type Lhs; - typedef typename internal::remove_all::type Rhs; - typedef typename internal::remove_all::type Res; - typedef typename Lhs::InnerIterator LhsInnerIterator; - typedef typename Lhs::Index Index; - static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) - { - for(Index c=0; c -struct sparse_time_dense_product_impl -{ - typedef typename internal::remove_all::type Lhs; - typedef typename internal::remove_all::type Rhs; - typedef typename internal::remove_all::type Res; - typedef typename Lhs::InnerIterator LhsInnerIterator; - typedef typename Lhs::Index Index; - static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) - { - for(Index j=0; j -struct sparse_time_dense_product_impl -{ - typedef typename internal::remove_all::type Lhs; - typedef typename internal::remove_all::type Rhs; - typedef typename internal::remove_all::type Res; - typedef typename Lhs::InnerIterator LhsInnerIterator; - typedef typename Lhs::Index Index; - static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) - { - for(Index j=0; j -inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) -{ - sparse_time_dense_product_impl::run(lhs, rhs, res, alpha); -} - -} // end namespace internal - -template -class SparseTimeDenseProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseTimeDenseProduct) - - SparseTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::sparse_time_dense_product(m_lhs, m_rhs, dest, alpha); - } - - private: - SparseTimeDenseProduct& operator=(const SparseTimeDenseProduct&); -}; - - -// dense = dense * sparse -namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{ - typedef Dense StorageKind; -}; -} // end namespace internal - -template -class DenseTimeSparseProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseProduct) - - DenseTimeSparseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - Transpose lhs_t(m_lhs); - Transpose rhs_t(m_rhs); - Transpose dest_t(dest); - internal::sparse_time_dense_product(rhs_t, lhs_t, dest_t, alpha); - } - - private: - DenseTimeSparseProduct& operator=(const DenseTimeSparseProduct&); -}; - -} // end namespace Eigen - -#endif // EIGEN_SPARSEDENSEPRODUCT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDiagonalProduct.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDiagonalProduct.h index 1bb590e64..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -1,196 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_DIAGONAL_PRODUCT_H -#define EIGEN_SPARSE_DIAGONAL_PRODUCT_H - -namespace Eigen { - -// The product of a diagonal matrix with a sparse matrix can be easily -// implemented using expression template. -// We have two consider very different cases: -// 1 - diag * row-major sparse -// => each inner vector <=> scalar * sparse vector product -// => so we can reuse CwiseUnaryOp::InnerIterator -// 2 - diag * col-major sparse -// => each inner vector <=> densevector * sparse vector cwise product -// => again, we can reuse specialization of CwiseBinaryOp::InnerIterator -// for that particular case -// The two other cases are symmetric. - -namespace internal { - -template -struct traits > -{ - typedef typename remove_all::type _Lhs; - typedef typename remove_all::type _Rhs; - typedef typename _Lhs::Scalar Scalar; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - typedef Sparse StorageKind; - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = _Lhs::RowsAtCompileTime, - ColsAtCompileTime = _Rhs::ColsAtCompileTime, - - MaxRowsAtCompileTime = _Lhs::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _Rhs::MaxColsAtCompileTime, - - SparseFlags = is_diagonal<_Lhs>::ret ? int(_Rhs::Flags) : int(_Lhs::Flags), - Flags = (SparseFlags&RowMajorBit), - CoeffReadCost = Dynamic - }; -}; - -enum {SDP_IsDiagonal, SDP_IsSparseRowMajor, SDP_IsSparseColMajor}; -template -class sparse_diagonal_product_inner_iterator_selector; - -} // end namespace internal - -template -class SparseDiagonalProduct - : public SparseMatrixBase >, - internal::no_assignment_operator -{ - typedef typename Lhs::Nested LhsNested; - typedef typename Rhs::Nested RhsNested; - - typedef typename internal::remove_all::type _LhsNested; - typedef typename internal::remove_all::type _RhsNested; - - enum { - LhsMode = internal::is_diagonal<_LhsNested>::ret ? internal::SDP_IsDiagonal - : (_LhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor, - RhsMode = internal::is_diagonal<_RhsNested>::ret ? internal::SDP_IsDiagonal - : (_RhsNested::Flags&RowMajorBit) ? internal::SDP_IsSparseRowMajor : internal::SDP_IsSparseColMajor - }; - - public: - - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseDiagonalProduct) - - typedef internal::sparse_diagonal_product_inner_iterator_selector - <_LhsNested,_RhsNested,SparseDiagonalProduct,LhsMode,RhsMode> InnerIterator; - - // We do not want ReverseInnerIterator for diagonal-sparse products, - // but this dummy declaration is needed to make diag * sparse * diag compile. - class ReverseInnerIterator; - - EIGEN_STRONG_INLINE SparseDiagonalProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs) - { - eigen_assert(lhs.cols() == rhs.rows() && "invalid sparse matrix * diagonal matrix product"); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } - - protected: - LhsNested m_lhs; - RhsNested m_rhs; -}; - -namespace internal { - -template -class sparse_diagonal_product_inner_iterator_selector - - : public CwiseUnaryOp,const Rhs>::InnerIterator -{ - typedef typename CwiseUnaryOp,const Rhs>::InnerIterator Base; - typedef typename Lhs::Index Index; - public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.rhs()*(expr.lhs().diagonal().coeff(outer)), outer) - {} -}; - -template -class sparse_diagonal_product_inner_iterator_selector - - : public CwiseBinaryOp< - scalar_product_op, - const typename Rhs::ConstInnerVectorReturnType, - const typename Lhs::DiagonalVectorType>::InnerIterator -{ - typedef typename CwiseBinaryOp< - scalar_product_op, - const typename Rhs::ConstInnerVectorReturnType, - const typename Lhs::DiagonalVectorType>::InnerIterator Base; - typedef typename Lhs::Index Index; - Index m_outer; - public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.rhs().innerVector(outer) .cwiseProduct(expr.lhs().diagonal()), 0), m_outer(outer) - {} - - inline Index outer() const { return m_outer; } - inline Index col() const { return m_outer; } -}; - -template -class sparse_diagonal_product_inner_iterator_selector - - : public CwiseUnaryOp,const Lhs>::InnerIterator -{ - typedef typename CwiseUnaryOp,const Lhs>::InnerIterator Base; - typedef typename Lhs::Index Index; - public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.lhs()*expr.rhs().diagonal().coeff(outer), outer) - {} -}; - -template -class sparse_diagonal_product_inner_iterator_selector - - : public CwiseBinaryOp< - scalar_product_op, - const typename Lhs::ConstInnerVectorReturnType, - const Transpose >::InnerIterator -{ - typedef typename CwiseBinaryOp< - scalar_product_op, - const typename Lhs::ConstInnerVectorReturnType, - const Transpose >::InnerIterator Base; - typedef typename Lhs::Index Index; - Index m_outer; - public: - inline sparse_diagonal_product_inner_iterator_selector( - const SparseDiagonalProductType& expr, Index outer) - : Base(expr.lhs().innerVector(outer) .cwiseProduct(expr.rhs().diagonal().transpose()), 0), m_outer(outer) - {} - - inline Index outer() const { return m_outer; } - inline Index row() const { return m_outer; } -}; - -} // end namespace internal - -// SparseMatrixBase functions - -template -template -const SparseDiagonalProduct -SparseMatrixBase::operator*(const DiagonalBase &other) const -{ - return SparseDiagonalProduct(this->derived(), other.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_DIAGONAL_PRODUCT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDot.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDot.h index db39c9aec..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDot.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseDot.h @@ -1,101 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_DOT_H -#define EIGEN_SPARSE_DOT_H - -namespace Eigen { - -template -template -typename internal::traits::Scalar -SparseMatrixBase::dot(const MatrixBase& other) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - eigen_assert(size() == other.size()); - eigen_assert(other.size()>0 && "you are using a non initialized vector"); - - typename Derived::InnerIterator i(derived(),0); - Scalar res(0); - while (i) - { - res += numext::conj(i.value()) * other.coeff(i.index()); - ++i; - } - return res; -} - -template -template -typename internal::traits::Scalar -SparseMatrixBase::dot(const SparseMatrixBase& other) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - eigen_assert(size() == other.size()); - - typedef typename Derived::Nested Nested; - typedef typename OtherDerived::Nested OtherNested; - typedef typename internal::remove_all::type NestedCleaned; - typedef typename internal::remove_all::type OtherNestedCleaned; - - Nested nthis(derived()); - OtherNested nother(other.derived()); - - typename NestedCleaned::InnerIterator i(nthis,0); - typename OtherNestedCleaned::InnerIterator j(nother,0); - Scalar res(0); - while (i && j) - { - if (i.index()==j.index()) - { - res += numext::conj(i.value()) * j.value(); - ++i; ++j; - } - else if (i.index() -inline typename NumTraits::Scalar>::Real -SparseMatrixBase::squaredNorm() const -{ - return numext::real((*this).cwiseAbs2().sum()); -} - -template -inline typename NumTraits::Scalar>::Real -SparseMatrixBase::norm() const -{ - using std::sqrt; - return sqrt(squaredNorm()); -} - -template -inline typename NumTraits::Scalar>::Real -SparseMatrixBase::blueNorm() const -{ - return internal::blueNorm_impl(*this); -} -} // end namespace Eigen - -#endif // EIGEN_SPARSE_DOT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseFuzzy.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseFuzzy.h index 45f36e9eb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseFuzzy.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseFuzzy.h @@ -1,26 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_FUZZY_H -#define EIGEN_SPARSE_FUZZY_H - -// template -// template -// bool SparseMatrixBase::isApprox( -// const OtherDerived& other, -// typename NumTraits::Real prec -// ) const -// { -// const typename internal::nested::type nested(derived()); -// const typename internal::nested::type otherNested(other.derived()); -// return (nested - otherNested).cwise().abs2().sum() -// <= prec * prec * (std::min)(nested.cwise().abs2().sum(), otherNested.cwise().abs2().sum()); -// } - -#endif // EIGEN_SPARSE_FUZZY_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrix.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrix.h index 3b8946a9c..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrix.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrix.h @@ -1,1262 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEMATRIX_H -#define EIGEN_SPARSEMATRIX_H - -namespace Eigen { - -/** \ingroup SparseCore_Module - * - * \class SparseMatrix - * - * \brief A versatible sparse matrix representation - * - * This class implements a more versatile variants of the common \em compressed row/column storage format. - * Each colmun's (resp. row) non zeros are stored as a pair of value with associated row (resp. colmiun) index. - * All the non zeros are stored in a single large buffer. Unlike the \em compressed format, there might be extra - * space inbetween the nonzeros of two successive colmuns (resp. rows) such that insertion of new non-zero - * can be done with limited memory reallocation and copies. - * - * A call to the function makeCompressed() turns the matrix into the standard \em compressed format - * compatible with many library. - * - * More details on this storage sceheme are given in the \ref TutorialSparse "manual pages". - * - * \tparam _Scalar the scalar type, i.e. the type of the coefficients - * \tparam _Options Union of bit flags controlling the storage scheme. Currently the only possibility - * is ColMajor or RowMajor. The default is 0 which means column-major. - * \tparam _Index the type of the indices. It has to be a \b signed type (e.g., short, int, std::ptrdiff_t). Default is \c int. - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIX_PLUGIN. - */ - -namespace internal { -template -struct traits > -{ - typedef _Scalar Scalar; - typedef _Index Index; - typedef Sparse StorageKind; - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = Dynamic, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = Dynamic, - Flags = _Options | NestByRefBit | LvalueBit, - CoeffReadCost = NumTraits::ReadCost, - SupportedAccessPatterns = InnerRandomAccessPattern - }; -}; - -template -struct traits, DiagIndex> > -{ - typedef SparseMatrix<_Scalar, _Options, _Index> MatrixType; - typedef typename nested::type MatrixTypeNested; - typedef typename remove_reference::type _MatrixTypeNested; - - typedef _Scalar Scalar; - typedef Dense StorageKind; - typedef _Index Index; - typedef MatrixXpr XprKind; - - enum { - RowsAtCompileTime = Dynamic, - ColsAtCompileTime = 1, - MaxRowsAtCompileTime = Dynamic, - MaxColsAtCompileTime = 1, - Flags = 0, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost*10 - }; -}; - -} // end namespace internal - -template -class SparseMatrix - : public SparseMatrixBase > -{ - public: - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseMatrix) - EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, +=) - EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseMatrix, -=) - - typedef MappedSparseMatrix Map; - using Base::IsRowMajor; - typedef internal::CompressedStorage Storage; - enum { - Options = _Options - }; - - protected: - - typedef SparseMatrix TransposedSparseMatrix; - - Index m_outerSize; - Index m_innerSize; - Index* m_outerIndex; - Index* m_innerNonZeros; // optional, if null then the data is compressed - Storage m_data; - - Eigen::Map > innerNonZeros() { return Eigen::Map >(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); } - const Eigen::Map > innerNonZeros() const { return Eigen::Map >(m_innerNonZeros, m_innerNonZeros?m_outerSize:0); } - - public: - - /** \returns whether \c *this is in compressed form. */ - inline bool isCompressed() const { return m_innerNonZeros==0; } - - /** \returns the number of rows of the matrix */ - inline Index rows() const { return IsRowMajor ? m_outerSize : m_innerSize; } - /** \returns the number of columns of the matrix */ - inline Index cols() const { return IsRowMajor ? m_innerSize : m_outerSize; } - - /** \returns the number of rows (resp. columns) of the matrix if the storage order column major (resp. row major) */ - inline Index innerSize() const { return m_innerSize; } - /** \returns the number of columns (resp. rows) of the matrix if the storage order column major (resp. row major) */ - inline Index outerSize() const { return m_outerSize; } - - /** \returns a const pointer to the array of values. - * This function is aimed at interoperability with other libraries. - * \sa innerIndexPtr(), outerIndexPtr() */ - inline const Scalar* valuePtr() const { return m_data.valuePtr(); } - /** \returns a non-const pointer to the array of values. - * This function is aimed at interoperability with other libraries. - * \sa innerIndexPtr(), outerIndexPtr() */ - inline Scalar* valuePtr() { return m_data.valuePtr(); } - - /** \returns a const pointer to the array of inner indices. - * This function is aimed at interoperability with other libraries. - * \sa valuePtr(), outerIndexPtr() */ - inline const Index* innerIndexPtr() const { return m_data.indexPtr(); } - /** \returns a non-const pointer to the array of inner indices. - * This function is aimed at interoperability with other libraries. - * \sa valuePtr(), outerIndexPtr() */ - inline Index* innerIndexPtr() { return m_data.indexPtr(); } - - /** \returns a const pointer to the array of the starting positions of the inner vectors. - * This function is aimed at interoperability with other libraries. - * \sa valuePtr(), innerIndexPtr() */ - inline const Index* outerIndexPtr() const { return m_outerIndex; } - /** \returns a non-const pointer to the array of the starting positions of the inner vectors. - * This function is aimed at interoperability with other libraries. - * \sa valuePtr(), innerIndexPtr() */ - inline Index* outerIndexPtr() { return m_outerIndex; } - - /** \returns a const pointer to the array of the number of non zeros of the inner vectors. - * This function is aimed at interoperability with other libraries. - * \warning it returns the null pointer 0 in compressed mode */ - inline const Index* innerNonZeroPtr() const { return m_innerNonZeros; } - /** \returns a non-const pointer to the array of the number of non zeros of the inner vectors. - * This function is aimed at interoperability with other libraries. - * \warning it returns the null pointer 0 in compressed mode */ - inline Index* innerNonZeroPtr() { return m_innerNonZeros; } - - /** \internal */ - inline Storage& data() { return m_data; } - /** \internal */ - inline const Storage& data() const { return m_data; } - - /** \returns the value of the matrix at position \a i, \a j - * This function returns Scalar(0) if the element is an explicit \em zero */ - inline Scalar coeff(Index row, Index col) const - { - eigen_assert(row>=0 && row=0 && col=0 && row=0 && col=start && "you probably called coeffRef on a non finalized matrix"); - if(end<=start) - return insert(row,col); - const Index p = m_data.searchLowerIndex(start,end-1,inner); - if((p=0 && row=0 && col::Constant(outerSize(), 2)); - } - return insertUncompressed(row,col); - } - - public: - - class InnerIterator; - class ReverseInnerIterator; - - /** Removes all non zeros but keep allocated memory */ - inline void setZero() - { - m_data.clear(); - memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(Index)); - if(m_innerNonZeros) - memset(m_innerNonZeros, 0, (m_outerSize)*sizeof(Index)); - } - - /** \returns the number of non zero coefficients */ - inline Index nonZeros() const - { - if(m_innerNonZeros) - return innerNonZeros().sum(); - return static_cast(m_data.size()); - } - - /** Preallocates \a reserveSize non zeros. - * - * Precondition: the matrix must be in compressed mode. */ - inline void reserve(Index reserveSize) - { - eigen_assert(isCompressed() && "This function does not make sense in non compressed mode."); - m_data.reserve(reserveSize); - } - - #ifdef EIGEN_PARSED_BY_DOXYGEN - /** Preallocates \a reserveSize[\c j] non zeros for each column (resp. row) \c j. - * - * This function turns the matrix in non-compressed mode */ - template - inline void reserve(const SizesType& reserveSizes); - #else - template - inline void reserve(const SizesType& reserveSizes, const typename SizesType::value_type& enableif = typename SizesType::value_type()) - { - EIGEN_UNUSED_VARIABLE(enableif); - reserveInnerVectors(reserveSizes); - } - template - inline void reserve(const SizesType& reserveSizes, const typename SizesType::Scalar& enableif = - #if (!defined(_MSC_VER)) || (_MSC_VER>=1500) // MSVC 2005 fails to compile with this typename - typename - #endif - SizesType::Scalar()) - { - EIGEN_UNUSED_VARIABLE(enableif); - reserveInnerVectors(reserveSizes); - } - #endif // EIGEN_PARSED_BY_DOXYGEN - protected: - template - inline void reserveInnerVectors(const SizesType& reserveSizes) - { - if(isCompressed()) - { - std::size_t totalReserveSize = 0; - // turn the matrix into non-compressed mode - m_innerNonZeros = static_cast(std::malloc(m_outerSize * sizeof(Index))); - if (!m_innerNonZeros) internal::throw_std_bad_alloc(); - - // temporarily use m_innerSizes to hold the new starting points. - Index* newOuterIndex = m_innerNonZeros; - - Index count = 0; - for(Index j=0; j=0; --j) - { - Index innerNNZ = previousOuterIndex - m_outerIndex[j]; - for(Index i=innerNNZ-1; i>=0; --i) - { - m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i); - m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i); - } - previousOuterIndex = m_outerIndex[j]; - m_outerIndex[j] = newOuterIndex[j]; - m_innerNonZeros[j] = innerNNZ; - } - m_outerIndex[m_outerSize] = m_outerIndex[m_outerSize-1] + m_innerNonZeros[m_outerSize-1] + reserveSizes[m_outerSize-1]; - - m_data.resize(m_outerIndex[m_outerSize]); - } - else - { - Index* newOuterIndex = static_cast(std::malloc((m_outerSize+1)*sizeof(Index))); - if (!newOuterIndex) internal::throw_std_bad_alloc(); - - Index count = 0; - for(Index j=0; j(reserveSizes[j], alreadyReserved); - count += toReserve + m_innerNonZeros[j]; - } - newOuterIndex[m_outerSize] = count; - - m_data.resize(count); - for(Index j=m_outerSize-1; j>=0; --j) - { - Index offset = newOuterIndex[j] - m_outerIndex[j]; - if(offset>0) - { - Index innerNNZ = m_innerNonZeros[j]; - for(Index i=innerNNZ-1; i>=0; --i) - { - m_data.index(newOuterIndex[j]+i) = m_data.index(m_outerIndex[j]+i); - m_data.value(newOuterIndex[j]+i) = m_data.value(m_outerIndex[j]+i); - } - } - } - - std::swap(m_outerIndex, newOuterIndex); - std::free(newOuterIndex); - } - - } - public: - - //--- low level purely coherent filling --- - - /** \internal - * \returns a reference to the non zero coefficient at position \a row, \a col assuming that: - * - the nonzero does not already exist - * - the new coefficient is the last one according to the storage order - * - * Before filling a given inner vector you must call the statVec(Index) function. - * - * After an insertion session, you should call the finalize() function. - * - * \sa insert, insertBackByOuterInner, startVec */ - inline Scalar& insertBack(Index row, Index col) - { - return insertBackByOuterInner(IsRowMajor?row:col, IsRowMajor?col:row); - } - - /** \internal - * \sa insertBack, startVec */ - inline Scalar& insertBackByOuterInner(Index outer, Index inner) - { - eigen_assert(size_t(m_outerIndex[outer+1]) == m_data.size() && "Invalid ordered insertion (invalid outer index)"); - eigen_assert( (m_outerIndex[outer+1]-m_outerIndex[outer]==0 || m_data.index(m_data.size()-1)(m_data.size()); - Index i = m_outerSize; - // find the last filled column - while (i>=0 && m_outerIndex[i]==0) - --i; - ++i; - while (i<=m_outerSize) - { - m_outerIndex[i] = size; - ++i; - } - } - } - - //--- - - template - void setFromTriplets(const InputIterators& begin, const InputIterators& end); - - void sumupDuplicates(); - - //--- - - /** \internal - * same as insert(Index,Index) except that the indices are given relative to the storage order */ - Scalar& insertByOuterInner(Index j, Index i) - { - return insert(IsRowMajor ? j : i, IsRowMajor ? i : j); - } - - /** Turns the matrix into the \em compressed format. - */ - void makeCompressed() - { - if(isCompressed()) - return; - - Index oldStart = m_outerIndex[1]; - m_outerIndex[1] = m_innerNonZeros[0]; - for(Index j=1; j0) - { - for(Index k=0; k(std::malloc(m_outerSize * sizeof(Index))); - for (Index i = 0; i < m_outerSize; i++) - { - m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; - } - } - - /** Suppresses all nonzeros which are \b much \b smaller \b than \a reference under the tolerence \a epsilon */ - void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits::dummy_precision()) - { - prune(default_prunning_func(reference,epsilon)); - } - - /** Turns the matrix into compressed format, and suppresses all nonzeros which do not satisfy the predicate \a keep. - * The functor type \a KeepFunc must implement the following function: - * \code - * bool operator() (const Index& row, const Index& col, const Scalar& value) const; - * \endcode - * \sa prune(Scalar,RealScalar) - */ - template - void prune(const KeepFunc& keep = KeepFunc()) - { - // TODO optimize the uncompressed mode to avoid moving and allocating the data twice - // TODO also implement a unit test - makeCompressed(); - - Index k = 0; - for(Index j=0; jrows() == rows && this->cols() == cols) return; - - // If one dimension is null, then there is nothing to be preserved - if(rows==0 || cols==0) return resize(rows,cols); - - Index innerChange = IsRowMajor ? cols - this->cols() : rows - this->rows(); - Index outerChange = IsRowMajor ? rows - this->rows() : cols - this->cols(); - Index newInnerSize = IsRowMajor ? cols : rows; - - // Deals with inner non zeros - if (m_innerNonZeros) - { - // Resize m_innerNonZeros - Index *newInnerNonZeros = static_cast(std::realloc(m_innerNonZeros, (m_outerSize + outerChange) * sizeof(Index))); - if (!newInnerNonZeros) internal::throw_std_bad_alloc(); - m_innerNonZeros = newInnerNonZeros; - - for(Index i=m_outerSize; i(std::malloc((m_outerSize+outerChange+1) * sizeof(Index))); - if (!m_innerNonZeros) internal::throw_std_bad_alloc(); - for(Index i = 0; i < m_outerSize; i++) - m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; - } - - // Change the m_innerNonZeros in case of a decrease of inner size - if (m_innerNonZeros && innerChange < 0) - { - for(Index i = 0; i < m_outerSize + (std::min)(outerChange, Index(0)); i++) - { - Index &n = m_innerNonZeros[i]; - Index start = m_outerIndex[i]; - while (n > 0 && m_data.index(start+n-1) >= newInnerSize) --n; - } - } - - m_innerSize = newInnerSize; - - // Re-allocate outer index structure if necessary - if (outerChange == 0) - return; - - Index *newOuterIndex = static_cast(std::realloc(m_outerIndex, (m_outerSize + outerChange + 1) * sizeof(Index))); - if (!newOuterIndex) internal::throw_std_bad_alloc(); - m_outerIndex = newOuterIndex; - if (outerChange > 0) - { - Index last = m_outerSize == 0 ? 0 : m_outerIndex[m_outerSize]; - for(Index i=m_outerSize; i(std::malloc((outerSize + 1) * sizeof(Index))); - if (!m_outerIndex) internal::throw_std_bad_alloc(); - - m_outerSize = outerSize; - } - if(m_innerNonZeros) - { - std::free(m_innerNonZeros); - m_innerNonZeros = 0; - } - memset(m_outerIndex, 0, (m_outerSize+1)*sizeof(Index)); - } - - /** \internal - * Resize the nonzero vector to \a size */ - void resizeNonZeros(Index size) - { - // TODO remove this function - m_data.resize(size); - } - - /** \returns a const expression of the diagonal coefficients */ - const Diagonal diagonal() const { return *this; } - - /** Default constructor yielding an empty \c 0 \c x \c 0 matrix */ - inline SparseMatrix() - : m_outerSize(-1), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) - { - check_template_parameters(); - resize(0, 0); - } - - /** Constructs a \a rows \c x \a cols empty matrix */ - inline SparseMatrix(Index rows, Index cols) - : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) - { - check_template_parameters(); - resize(rows, cols); - } - - /** Constructs a sparse matrix from the sparse expression \a other */ - template - inline SparseMatrix(const SparseMatrixBase& other) - : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) - { - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - check_template_parameters(); - *this = other.derived(); - } - - /** Constructs a sparse matrix from the sparse selfadjoint view \a other */ - template - inline SparseMatrix(const SparseSelfAdjointView& other) - : m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) - { - check_template_parameters(); - *this = other; - } - - /** Copy constructor (it performs a deep copy) */ - inline SparseMatrix(const SparseMatrix& other) - : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) - { - check_template_parameters(); - *this = other.derived(); - } - - /** \brief Copy constructor with in-place evaluation */ - template - SparseMatrix(const ReturnByValue& other) - : Base(), m_outerSize(0), m_innerSize(0), m_outerIndex(0), m_innerNonZeros(0) - { - check_template_parameters(); - initAssignment(other); - other.evalTo(*this); - } - - /** Swaps the content of two sparse matrices of the same type. - * This is a fast operation that simply swaps the underlying pointers and parameters. */ - inline void swap(SparseMatrix& other) - { - //EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: swap\n"); - std::swap(m_outerIndex, other.m_outerIndex); - std::swap(m_innerSize, other.m_innerSize); - std::swap(m_outerSize, other.m_outerSize); - std::swap(m_innerNonZeros, other.m_innerNonZeros); - m_data.swap(other.m_data); - } - - /** Sets *this to the identity matrix. - * This function also turns the matrix into compressed mode, and drop any reserved memory. */ - inline void setIdentity() - { - eigen_assert(rows() == cols() && "ONLY FOR SQUARED MATRICES"); - this->m_data.resize(rows()); - Eigen::Map >(this->m_data.indexPtr(), rows()).setLinSpaced(0, rows()-1); - Eigen::Map >(this->m_data.valuePtr(), rows()).setOnes(); - Eigen::Map >(this->m_outerIndex, rows()+1).setLinSpaced(0, rows()); - std::free(m_innerNonZeros); - m_innerNonZeros = 0; - } - inline SparseMatrix& operator=(const SparseMatrix& other) - { - if (other.isRValue()) - { - swap(other.const_cast_derived()); - } - else if(this!=&other) - { - initAssignment(other); - if(other.isCompressed()) - { - memcpy(m_outerIndex, other.m_outerIndex, (m_outerSize+1)*sizeof(Index)); - m_data = other.m_data; - } - else - { - Base::operator=(other); - } - } - return *this; - } - - #ifndef EIGEN_PARSED_BY_DOXYGEN - template - inline SparseMatrix& operator=(const SparseSparseProduct& product) - { return Base::operator=(product); } - - template - inline SparseMatrix& operator=(const ReturnByValue& other) - { - initAssignment(other); - return Base::operator=(other.derived()); - } - - template - inline SparseMatrix& operator=(const EigenBase& other) - { return Base::operator=(other.derived()); } - #endif - - template - EIGEN_DONT_INLINE SparseMatrix& operator=(const SparseMatrixBase& other); - - friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m) - { - EIGEN_DBG_SPARSE( - s << "Nonzero entries:\n"; - if(m.isCompressed()) - for (Index i=0; i&>(m); - return s; - } - - /** Destructor */ - inline ~SparseMatrix() - { - std::free(m_outerIndex); - std::free(m_innerNonZeros); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** Overloaded for performance */ - Scalar sum() const; -#endif - -# ifdef EIGEN_SPARSEMATRIX_PLUGIN -# include EIGEN_SPARSEMATRIX_PLUGIN -# endif - -protected: - - template - void initAssignment(const Other& other) - { - resize(other.rows(), other.cols()); - if(m_innerNonZeros) - { - std::free(m_innerNonZeros); - m_innerNonZeros = 0; - } - } - - /** \internal - * \sa insert(Index,Index) */ - EIGEN_DONT_INLINE Scalar& insertCompressed(Index row, Index col); - - /** \internal - * A vector object that is equal to 0 everywhere but v at the position i */ - class SingletonVector - { - Index m_index; - Index m_value; - public: - typedef Index value_type; - SingletonVector(Index i, Index v) - : m_index(i), m_value(v) - {} - - Index operator[](Index i) const { return i==m_index ? m_value : 0; } - }; - - /** \internal - * \sa insert(Index,Index) */ - EIGEN_DONT_INLINE Scalar& insertUncompressed(Index row, Index col); - -public: - /** \internal - * \sa insert(Index,Index) */ - EIGEN_STRONG_INLINE Scalar& insertBackUncompressed(Index row, Index col) - { - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - eigen_assert(!isCompressed()); - eigen_assert(m_innerNonZeros[outer]<=(m_outerIndex[outer+1] - m_outerIndex[outer])); - - Index p = m_outerIndex[outer] + m_innerNonZeros[outer]++; - m_data.index(p) = inner; - return (m_data.value(p) = 0); - } - -private: - static void check_template_parameters() - { - EIGEN_STATIC_ASSERT(NumTraits::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); - EIGEN_STATIC_ASSERT((Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS); - } - - struct default_prunning_func { - default_prunning_func(const Scalar& ref, const RealScalar& eps) : reference(ref), epsilon(eps) {} - inline bool operator() (const Index&, const Index&, const Scalar& value) const - { - return !internal::isMuchSmallerThan(value, reference, epsilon); - } - Scalar reference; - RealScalar epsilon; - }; -}; - -template -class SparseMatrix::InnerIterator -{ - public: - InnerIterator(const SparseMatrix& mat, Index outer) - : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer), m_id(mat.m_outerIndex[outer]) - { - if(mat.isCompressed()) - m_end = mat.m_outerIndex[outer+1]; - else - m_end = m_id + mat.m_innerNonZeros[outer]; - } - - inline InnerIterator& operator++() { m_id++; return *this; } - - inline const Scalar& value() const { return m_values[m_id]; } - inline Scalar& valueRef() { return const_cast(m_values[m_id]); } - - inline Index index() const { return m_indices[m_id]; } - inline Index outer() const { return m_outer; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - inline operator bool() const { return (m_id < m_end); } - - protected: - const Scalar* m_values; - const Index* m_indices; - const Index m_outer; - Index m_id; - Index m_end; -}; - -template -class SparseMatrix::ReverseInnerIterator -{ - public: - ReverseInnerIterator(const SparseMatrix& mat, Index outer) - : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer), m_start(mat.m_outerIndex[outer]) - { - if(mat.isCompressed()) - m_id = mat.m_outerIndex[outer+1]; - else - m_id = m_start + mat.m_innerNonZeros[outer]; - } - - inline ReverseInnerIterator& operator--() { --m_id; return *this; } - - inline const Scalar& value() const { return m_values[m_id-1]; } - inline Scalar& valueRef() { return const_cast(m_values[m_id-1]); } - - inline Index index() const { return m_indices[m_id-1]; } - inline Index outer() const { return m_outer; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - inline operator bool() const { return (m_id > m_start); } - - protected: - const Scalar* m_values; - const Index* m_indices; - const Index m_outer; - Index m_id; - const Index m_start; -}; - -namespace internal { - -template -void set_from_triplets(const InputIterator& begin, const InputIterator& end, SparseMatrixType& mat, int Options = 0) -{ - EIGEN_UNUSED_VARIABLE(Options); - enum { IsRowMajor = SparseMatrixType::IsRowMajor }; - typedef typename SparseMatrixType::Scalar Scalar; - typedef typename SparseMatrixType::Index Index; - SparseMatrix trMat(mat.rows(),mat.cols()); - - if(begin!=end) - { - // pass 1: count the nnz per inner-vector - Matrix wi(trMat.outerSize()); - wi.setZero(); - for(InputIterator it(begin); it!=end; ++it) - { - eigen_assert(it->row()>=0 && it->row()col()>=0 && it->col()col() : it->row())++; - } - - // pass 2: insert all the elements into trMat - trMat.reserve(wi); - for(InputIterator it(begin); it!=end; ++it) - trMat.insertBackUncompressed(it->row(),it->col()) = it->value(); - - // pass 3: - trMat.sumupDuplicates(); - } - - // pass 4: transposed copy -> implicit sorting - mat = trMat; -} - -} - - -/** Fill the matrix \c *this with the list of \em triplets defined by the iterator range \a begin - \a end. - * - * A \em triplet is a tuple (i,j,value) defining a non-zero element. - * The input list of triplets does not have to be sorted, and can contains duplicated elements. - * In any case, the result is a \b sorted and \b compressed sparse matrix where the duplicates have been summed up. - * This is a \em O(n) operation, with \em n the number of triplet elements. - * The initial contents of \c *this is destroyed. - * The matrix \c *this must be properly resized beforehand using the SparseMatrix(Index,Index) constructor, - * or the resize(Index,Index) method. The sizes are not extracted from the triplet list. - * - * The \a InputIterators value_type must provide the following interface: - * \code - * Scalar value() const; // the value - * Scalar row() const; // the row index i - * Scalar col() const; // the column index j - * \endcode - * See for instance the Eigen::Triplet template class. - * - * Here is a typical usage example: - * \code - typedef Triplet T; - std::vector tripletList; - triplets.reserve(estimation_of_entries); - for(...) - { - // ... - tripletList.push_back(T(i,j,v_ij)); - } - SparseMatrixType m(rows,cols); - m.setFromTriplets(tripletList.begin(), tripletList.end()); - // m is ready to go! - * \endcode - * - * \warning The list of triplets is read multiple times (at least twice). Therefore, it is not recommended to define - * an abstract iterator over a complex data-structure that would be expensive to evaluate. The triplets should rather - * be explicitely stored into a std::vector for instance. - */ -template -template -void SparseMatrix::setFromTriplets(const InputIterators& begin, const InputIterators& end) -{ - internal::set_from_triplets(begin, end, *this); -} - -/** \internal */ -template -void SparseMatrix::sumupDuplicates() -{ - eigen_assert(!isCompressed()); - // TODO, in practice we should be able to use m_innerNonZeros for that task - Matrix wi(innerSize()); - wi.fill(-1); - Index count = 0; - // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers - for(Index j=0; j=start) - { - // we already meet this entry => accumulate it - m_data.value(wi(i)) += m_data.value(k); - } - else - { - m_data.value(count) = m_data.value(k); - m_data.index(count) = m_data.index(k); - wi(i) = count; - ++count; - } - } - m_outerIndex[j] = start; - } - m_outerIndex[m_outerSize] = count; - - // turn the matrix into compressed form - std::free(m_innerNonZeros); - m_innerNonZeros = 0; - m_data.resize(m_outerIndex[m_outerSize]); -} - -template -template -EIGEN_DONT_INLINE SparseMatrix& SparseMatrix::operator=(const SparseMatrixBase& other) -{ - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - const bool needToTranspose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - if (needToTranspose) - { - // two passes algorithm: - // 1 - compute the number of coeffs per dest inner vector - // 2 - do the actual copy/eval - // Since each coeff of the rhs has to be evaluated twice, let's evaluate it if needed - typedef typename internal::nested::type OtherCopy; - typedef typename internal::remove_all::type _OtherCopy; - OtherCopy otherCopy(other.derived()); - - SparseMatrix dest(other.rows(),other.cols()); - Eigen::Map > (dest.m_outerIndex,dest.outerSize()).setZero(); - - // pass 1 - // FIXME the above copy could be merged with that pass - for (Index j=0; j positions(dest.outerSize()); - for (Index j=0; jswap(dest); - return *this; - } - else - { - if(other.isRValue()) - initAssignment(other.derived()); - // there is no special optimization - return Base::operator=(other.derived()); - } -} - -template -EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertUncompressed(Index row, Index col) -{ - eigen_assert(!isCompressed()); - - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - Index room = m_outerIndex[outer+1] - m_outerIndex[outer]; - Index innerNNZ = m_innerNonZeros[outer]; - if(innerNNZ>=room) - { - // this inner vector is full, we need to reallocate the whole buffer :( - reserve(SingletonVector(outer,std::max(2,innerNNZ))); - } - - Index startId = m_outerIndex[outer]; - Index p = startId + m_innerNonZeros[outer]; - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - eigen_assert((p<=startId || m_data.index(p-1)!=inner) && "you cannot insert an element that already exist, you must call coeffRef to this end"); - - m_innerNonZeros[outer]++; - - m_data.index(p) = inner; - return (m_data.value(p) = 0); -} - -template -EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_Index>::Scalar& SparseMatrix<_Scalar,_Options,_Index>::insertCompressed(Index row, Index col) -{ - eigen_assert(isCompressed()); - - const Index outer = IsRowMajor ? row : col; - const Index inner = IsRowMajor ? col : row; - - Index previousOuter = outer; - if (m_outerIndex[outer+1]==0) - { - // we start a new inner vector - while (previousOuter>=0 && m_outerIndex[previousOuter]==0) - { - m_outerIndex[previousOuter] = static_cast(m_data.size()); - --previousOuter; - } - m_outerIndex[outer+1] = m_outerIndex[outer]; - } - - // here we have to handle the tricky case where the outerIndex array - // starts with: [ 0 0 0 0 0 1 ...] and we are inserted in, e.g., - // the 2nd inner vector... - bool isLastVec = (!(previousOuter==-1 && m_data.size()!=0)) - && (size_t(m_outerIndex[outer+1]) == m_data.size()); - - size_t startId = m_outerIndex[outer]; - // FIXME let's make sure sizeof(long int) == sizeof(size_t) - size_t p = m_outerIndex[outer+1]; - ++m_outerIndex[outer+1]; - - double reallocRatio = 1; - if (m_data.allocatedSize()<=m_data.size()) - { - // if there is no preallocated memory, let's reserve a minimum of 32 elements - if (m_data.size()==0) - { - m_data.reserve(32); - } - else - { - // we need to reallocate the data, to reduce multiple reallocations - // we use a smart resize algorithm based on the current filling ratio - // in addition, we use double to avoid integers overflows - double nnzEstimate = double(m_outerIndex[outer])*double(m_outerSize)/double(outer+1); - reallocRatio = (nnzEstimate-double(m_data.size()))/double(m_data.size()); - // furthermore we bound the realloc ratio to: - // 1) reduce multiple minor realloc when the matrix is almost filled - // 2) avoid to allocate too much memory when the matrix is almost empty - reallocRatio = (std::min)((std::max)(reallocRatio,1.5),8.); - } - } - m_data.resize(m_data.size()+1,reallocRatio); - - if (!isLastVec) - { - if (previousOuter==-1) - { - // oops wrong guess. - // let's correct the outer offsets - for (Index k=0; k<=(outer+1); ++k) - m_outerIndex[k] = 0; - Index k=outer+1; - while(m_outerIndex[k]==0) - m_outerIndex[k++] = 1; - while (k<=m_outerSize && m_outerIndex[k]!=0) - m_outerIndex[k++]++; - p = 0; - --k; - k = m_outerIndex[k]-1; - while (k>0) - { - m_data.index(k) = m_data.index(k-1); - m_data.value(k) = m_data.value(k-1); - k--; - } - } - else - { - // we are not inserting into the last inner vec - // update outer indices: - Index j = outer+2; - while (j<=m_outerSize && m_outerIndex[j]!=0) - m_outerIndex[j++]++; - --j; - // shift data of last vecs: - Index k = m_outerIndex[j]-1; - while (k>=Index(p)) - { - m_data.index(k) = m_data.index(k-1); - m_data.value(k) = m_data.value(k-1); - k--; - } - } - } - - while ( (p > startId) && (m_data.index(p-1) > inner) ) - { - m_data.index(p) = m_data.index(p-1); - m_data.value(p) = m_data.value(p-1); - --p; - } - - m_data.index(p) = inner; - return (m_data.value(p) = 0); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSEMATRIX_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrixBase.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrixBase.h index 6f4a47cf5..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrixBase.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseMatrixBase.h @@ -1,462 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEMATRIXBASE_H -#define EIGEN_SPARSEMATRIXBASE_H - -namespace Eigen { - -/** \ingroup SparseCore_Module - * - * \class SparseMatrixBase - * - * \brief Base class of any sparse matrices or sparse expressions - * - * \tparam Derived - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEMATRIXBASE_PLUGIN. - */ -template class SparseMatrixBase -#ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real, - EigenBase > -#else - : public EigenBase -#endif // not EIGEN_PARSED_BY_DOXYGEN -{ - public: - - typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type PacketScalar; - typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; - typedef typename internal::traits::Index StorageIndex; - typedef typename internal::add_const_on_value_type_if_arithmetic< - typename internal::packet_traits::type - >::type PacketReturnType; - - typedef SparseMatrixBase StorageBaseType; - - template - Derived& operator=(const EigenBase &other) - { - other.derived().evalTo(derived()); - return derived(); - } - - enum { - - RowsAtCompileTime = internal::traits::RowsAtCompileTime, - /**< The number of rows at compile-time. This is just a copy of the value provided - * by the \a Derived type. If a value is not known at compile-time, - * it is set to the \a Dynamic constant. - * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ - - ColsAtCompileTime = internal::traits::ColsAtCompileTime, - /**< The number of columns at compile-time. This is just a copy of the value provided - * by the \a Derived type. If a value is not known at compile-time, - * it is set to the \a Dynamic constant. - * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ - - - SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, - internal::traits::ColsAtCompileTime>::ret), - /**< This is equal to the number of coefficients, i.e. the number of - * rows times the number of columns, or to \a Dynamic if this is not - * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ - - MaxRowsAtCompileTime = RowsAtCompileTime, - MaxColsAtCompileTime = ColsAtCompileTime, - - MaxSizeAtCompileTime = (internal::size_at_compile_time::ret), - - IsVectorAtCompileTime = RowsAtCompileTime == 1 || ColsAtCompileTime == 1, - /**< This is set to true if either the number of rows or the number of - * columns is known at compile-time to be equal to 1. Indeed, in that case, - * we are dealing with a column-vector (if there is only one column) or with - * a row-vector (if there is only one row). */ - - Flags = internal::traits::Flags, - /**< This stores expression \ref flags flags which may or may not be inherited by new expressions - * constructed from this one. See the \ref flags "list of flags". - */ - - CoeffReadCost = internal::traits::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - - IsRowMajor = Flags&RowMajorBit ? 1 : 0, - - InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) - : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), - - #ifndef EIGEN_PARSED_BY_DOXYGEN - _HasDirectAccess = (int(Flags)&DirectAccessBit) ? 1 : 0 // workaround sunCC - #endif - }; - - /** \internal the return type of MatrixBase::adjoint() */ - typedef typename internal::conditional::IsComplex, - CwiseUnaryOp, Eigen::Transpose >, - Transpose - >::type AdjointReturnType; - - - typedef SparseMatrix PlainObject; - - -#ifndef EIGEN_PARSED_BY_DOXYGEN - /** This is the "real scalar" type; if the \a Scalar type is already real numbers - * (e.g. int, float or double) then \a RealScalar is just the same as \a Scalar. If - * \a Scalar is \a std::complex then RealScalar is \a T. - * - * \sa class NumTraits - */ - typedef typename NumTraits::Real RealScalar; - - /** \internal the return type of coeff() - */ - typedef typename internal::conditional<_HasDirectAccess, const Scalar&, Scalar>::type CoeffReturnType; - - /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Matrix > ConstantReturnType; - - /** type of the equivalent square matrix */ - typedef Matrix SquareMatrixType; - - inline const Derived& derived() const { return *static_cast(this); } - inline Derived& derived() { return *static_cast(this); } - inline Derived& const_cast_derived() const - { return *static_cast(const_cast(this)); } - - typedef internal::special_scalar_op_base > Base; - using Base::operator*; -#endif // not EIGEN_PARSED_BY_DOXYGEN - -#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase -# include "../plugins/CommonCwiseUnaryOps.h" -# include "../plugins/CommonCwiseBinaryOps.h" -# include "../plugins/MatrixCwiseUnaryOps.h" -# include "../plugins/MatrixCwiseBinaryOps.h" -# include "../plugins/BlockMethods.h" -# ifdef EIGEN_SPARSEMATRIXBASE_PLUGIN -# include EIGEN_SPARSEMATRIXBASE_PLUGIN -# endif -# undef EIGEN_CURRENT_STORAGE_BASE_CLASS -#undef EIGEN_CURRENT_STORAGE_BASE_CLASS - - /** \returns the number of rows. \sa cols() */ - inline Index rows() const { return derived().rows(); } - /** \returns the number of columns. \sa rows() */ - inline Index cols() const { return derived().cols(); } - /** \returns the number of coefficients, which is \a rows()*cols(). - * \sa rows(), cols(). */ - inline Index size() const { return rows() * cols(); } - /** \returns the number of nonzero coefficients which is in practice the number - * of stored coefficients. */ - inline Index nonZeros() const { return derived().nonZeros(); } - /** \returns true if either the number of rows or the number of columns is equal to 1. - * In other words, this function returns - * \code rows()==1 || cols()==1 \endcode - * \sa rows(), cols(), IsVectorAtCompileTime. */ - inline bool isVector() const { return rows()==1 || cols()==1; } - /** \returns the size of the storage major dimension, - * i.e., the number of columns for a columns major matrix, and the number of rows otherwise */ - Index outerSize() const { return (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); } - /** \returns the size of the inner dimension according to the storage order, - * i.e., the number of rows for a columns major matrix, and the number of cols otherwise */ - Index innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); } - - bool isRValue() const { return m_isRValue; } - Derived& markAsRValue() { m_isRValue = true; return derived(); } - - SparseMatrixBase() : m_isRValue(false) { /* TODO check flags */ } - - - template - Derived& operator=(const ReturnByValue& other) - { - other.evalTo(derived()); - return derived(); - } - - - template - inline Derived& operator=(const SparseMatrixBase& other) - { - return assign(other.derived()); - } - - inline Derived& operator=(const Derived& other) - { -// if (other.isRValue()) -// derived().swap(other.const_cast_derived()); -// else - return assign(other.derived()); - } - - protected: - - template - inline Derived& assign(const OtherDerived& other) - { - const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - const Index outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? other.rows() : other.cols(); - if ((!transpose) && other.isRValue()) - { - // eval without temporary - derived().resize(other.rows(), other.cols()); - derived().setZero(); - derived().reserve((std::max)(this->rows(),this->cols())*2); - for (Index j=0; j - inline void assignGeneric(const OtherDerived& other) - { - //const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit); - eigen_assert(( ((internal::traits::SupportedAccessPatterns&OuterRandomAccessPattern)==OuterRandomAccessPattern) || - (!((Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit)))) && - "the transpose operation is supposed to be handled in SparseMatrix::operator="); - - enum { Flip = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit) }; - - const Index outerSize = other.outerSize(); - //typedef typename internal::conditional, Derived>::type TempType; - // thanks to shallow copies, we always eval to a tempary - Derived temp(other.rows(), other.cols()); - - temp.reserve((std::max)(this->rows(),this->cols())*2); - for (Index j=0; j - inline Derived& operator=(const SparseSparseProduct& product); - - friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m) - { - typedef typename Derived::Nested Nested; - typedef typename internal::remove_all::type NestedCleaned; - - if (Flags&RowMajorBit) - { - const Nested nm(m.derived()); - for (Index row=0; row trans = m; - s << static_cast >&>(trans); - } - } - return s; - } - - template - Derived& operator+=(const SparseMatrixBase& other); - template - Derived& operator-=(const SparseMatrixBase& other); - - Derived& operator*=(const Scalar& other); - Derived& operator/=(const Scalar& other); - - template struct CwiseProductDenseReturnType { - typedef CwiseBinaryOp::Scalar, - typename internal::traits::Scalar - >::ReturnType>, - const Derived, - const OtherDerived - > Type; - }; - - template - EIGEN_STRONG_INLINE const typename CwiseProductDenseReturnType::Type - cwiseProduct(const MatrixBase &other) const; - - // sparse * sparse - template - const typename SparseSparseProductReturnType::Type - operator*(const SparseMatrixBase &other) const; - - // sparse * diagonal - template - const SparseDiagonalProduct - operator*(const DiagonalBase &other) const; - - // diagonal * sparse - template friend - const SparseDiagonalProduct - operator*(const DiagonalBase &lhs, const SparseMatrixBase& rhs) - { return SparseDiagonalProduct(lhs.derived(), rhs.derived()); } - - /** dense * sparse (return a dense object unless it is an outer product) */ - template friend - const typename DenseSparseProductReturnType::Type - operator*(const MatrixBase& lhs, const Derived& rhs) - { return typename DenseSparseProductReturnType::Type(lhs.derived(),rhs); } - - /** sparse * dense (returns a dense object unless it is an outer product) */ - template - const typename SparseDenseProductReturnType::Type - operator*(const MatrixBase &other) const - { return typename SparseDenseProductReturnType::Type(derived(), other.derived()); } - - /** \returns an expression of P H P^-1 where H is the matrix represented by \c *this */ - SparseSymmetricPermutationProduct twistedBy(const PermutationMatrix& perm) const - { - return SparseSymmetricPermutationProduct(derived(), perm); - } - - template - Derived& operator*=(const SparseMatrixBase& other); - - #ifdef EIGEN2_SUPPORT - // deprecated - template - typename internal::plain_matrix_type_column_major::type - solveTriangular(const MatrixBase& other) const; - - // deprecated - template - void solveTriangularInPlace(MatrixBase& other) const; - #endif // EIGEN2_SUPPORT - - template - inline const SparseTriangularView triangularView() const; - - template inline const SparseSelfAdjointView selfadjointView() const; - template inline SparseSelfAdjointView selfadjointView(); - - template Scalar dot(const MatrixBase& other) const; - template Scalar dot(const SparseMatrixBase& other) const; - RealScalar squaredNorm() const; - RealScalar norm() const; - RealScalar blueNorm() const; - - Transpose transpose() { return derived(); } - const Transpose transpose() const { return derived(); } - const AdjointReturnType adjoint() const { return transpose(); } - - // inner-vector - typedef Block InnerVectorReturnType; - typedef Block ConstInnerVectorReturnType; - InnerVectorReturnType innerVector(Index outer); - const ConstInnerVectorReturnType innerVector(Index outer) const; - - // set of inner-vectors - typedef Block InnerVectorsReturnType; - typedef Block ConstInnerVectorsReturnType; - InnerVectorsReturnType innerVectors(Index outerStart, Index outerSize); - const ConstInnerVectorsReturnType innerVectors(Index outerStart, Index outerSize) const; - - /** \internal use operator= */ - template - void evalTo(MatrixBase& dst) const - { - dst.setZero(); - for (Index j=0; j toDense() const - { - return derived(); - } - - template - bool isApprox(const SparseMatrixBase& other, - const RealScalar& prec = NumTraits::dummy_precision()) const - { return toDense().isApprox(other.toDense(),prec); } - - template - bool isApprox(const MatrixBase& other, - const RealScalar& prec = NumTraits::dummy_precision()) const - { return toDense().isApprox(other,prec); } - - /** \returns the matrix or vector obtained by evaluating this expression. - * - * Notice that in the case of a plain matrix or vector (not an expression) this function just returns - * a const reference, in order to avoid a useless copy. - */ - inline const typename internal::eval::type eval() const - { return typename internal::eval::type(derived()); } - - Scalar sum() const; - - protected: - - bool m_isRValue; -}; - -} // end namespace Eigen - -#endif // EIGEN_SPARSEMATRIXBASE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparsePermutation.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparsePermutation.h index 75e210009..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparsePermutation.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparsePermutation.h @@ -1,148 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_PERMUTATION_H -#define EIGEN_SPARSE_PERMUTATION_H - -// This file implements sparse * permutation products - -namespace Eigen { - -namespace internal { - -template -struct traits > -{ - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixTypeNestedCleaned::Scalar Scalar; - typedef typename MatrixTypeNestedCleaned::Index Index; - enum { - SrcStorageOrder = MatrixTypeNestedCleaned::Flags&RowMajorBit ? RowMajor : ColMajor, - MoveOuter = SrcStorageOrder==RowMajor ? Side==OnTheLeft : Side==OnTheRight - }; - - typedef typename internal::conditional, - SparseMatrix >::type ReturnType; -}; - -template -struct permut_sparsematrix_product_retval - : public ReturnByValue > -{ - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixTypeNestedCleaned::Scalar Scalar; - typedef typename MatrixTypeNestedCleaned::Index Index; - - enum { - SrcStorageOrder = MatrixTypeNestedCleaned::Flags&RowMajorBit ? RowMajor : ColMajor, - MoveOuter = SrcStorageOrder==RowMajor ? Side==OnTheLeft : Side==OnTheRight - }; - - permut_sparsematrix_product_retval(const PermutationType& perm, const MatrixType& matrix) - : m_permutation(perm), m_matrix(matrix) - {} - - inline int rows() const { return m_matrix.rows(); } - inline int cols() const { return m_matrix.cols(); } - - template inline void evalTo(Dest& dst) const - { - if(MoveOuter) - { - SparseMatrix tmp(m_matrix.rows(), m_matrix.cols()); - Matrix sizes(m_matrix.outerSize()); - for(Index j=0; j tmp(m_matrix.rows(), m_matrix.cols()); - Matrix sizes(tmp.outerSize()); - sizes.setZero(); - PermutationMatrix perm; - if((Side==OnTheLeft) ^ Transposed) - perm = m_permutation; - else - perm = m_permutation.transpose(); - - for(Index j=0; j -inline const internal::permut_sparsematrix_product_retval, SparseDerived, OnTheRight, false> -operator*(const SparseMatrixBase& matrix, const PermutationBase& perm) -{ - return internal::permut_sparsematrix_product_retval, SparseDerived, OnTheRight, false>(perm, matrix.derived()); -} - -/** \returns the matrix with the permutation applied to the rows - */ -template -inline const internal::permut_sparsematrix_product_retval, SparseDerived, OnTheLeft, false> -operator*( const PermutationBase& perm, const SparseMatrixBase& matrix) -{ - return internal::permut_sparsematrix_product_retval, SparseDerived, OnTheLeft, false>(perm, matrix.derived()); -} - - - -/** \returns the matrix with the inverse permutation applied to the columns. - */ -template -inline const internal::permut_sparsematrix_product_retval, SparseDerived, OnTheRight, true> -operator*(const SparseMatrixBase& matrix, const Transpose >& tperm) -{ - return internal::permut_sparsematrix_product_retval, SparseDerived, OnTheRight, true>(tperm.nestedPermutation(), matrix.derived()); -} - -/** \returns the matrix with the inverse permutation applied to the rows. - */ -template -inline const internal::permut_sparsematrix_product_retval, SparseDerived, OnTheLeft, true> -operator*(const Transpose >& tperm, const SparseMatrixBase& matrix) -{ - return internal::permut_sparsematrix_product_retval, SparseDerived, OnTheLeft, true>(tperm.nestedPermutation(), matrix.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseProduct.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseProduct.h index cf7663070..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseProduct.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseProduct.h @@ -1,188 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEPRODUCT_H -#define EIGEN_SPARSEPRODUCT_H - -namespace Eigen { - -template -struct SparseSparseProductReturnType -{ - typedef typename internal::traits::Scalar Scalar; - typedef typename internal::traits::Index Index; - enum { - LhsRowMajor = internal::traits::Flags & RowMajorBit, - RhsRowMajor = internal::traits::Flags & RowMajorBit, - TransposeRhs = (!LhsRowMajor) && RhsRowMajor, - TransposeLhs = LhsRowMajor && (!RhsRowMajor) - }; - - typedef typename internal::conditional, - typename internal::nested::type>::type LhsNested; - - typedef typename internal::conditional, - typename internal::nested::type>::type RhsNested; - - typedef SparseSparseProduct Type; -}; - -namespace internal { -template -struct traits > -{ - typedef MatrixXpr XprKind; - // clean the nested types: - typedef typename remove_all::type _LhsNested; - typedef typename remove_all::type _RhsNested; - typedef typename _LhsNested::Scalar Scalar; - typedef typename promote_index_type::Index, - typename traits<_RhsNested>::Index>::type Index; - - enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - - RowsAtCompileTime = _LhsNested::RowsAtCompileTime, - ColsAtCompileTime = _RhsNested::ColsAtCompileTime, - MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime, - MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime, - - InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime), - - EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit), - - RemovedBits = ~(EvalToRowMajor ? 0 : RowMajorBit), - - Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits) - | EvalBeforeAssigningBit - | EvalBeforeNestingBit, - - CoeffReadCost = Dynamic - }; - - typedef Sparse StorageKind; -}; - -} // end namespace internal - -template -class SparseSparseProduct : internal::no_assignment_operator, - public SparseMatrixBase > -{ - public: - - typedef SparseMatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(SparseSparseProduct) - - private: - - typedef typename internal::traits::_LhsNested _LhsNested; - typedef typename internal::traits::_RhsNested _RhsNested; - - public: - - template - EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs) - : m_lhs(lhs), m_rhs(rhs), m_tolerance(0), m_conservative(true) - { - init(); - } - - template - EIGEN_STRONG_INLINE SparseSparseProduct(const Lhs& lhs, const Rhs& rhs, const RealScalar& tolerance) - : m_lhs(lhs), m_rhs(rhs), m_tolerance(tolerance), m_conservative(false) - { - init(); - } - - SparseSparseProduct pruned(const Scalar& reference = 0, const RealScalar& epsilon = NumTraits::dummy_precision()) const - { - using std::abs; - return SparseSparseProduct(m_lhs,m_rhs,abs(reference)*epsilon); - } - - template - void evalTo(Dest& result) const - { - if(m_conservative) - internal::conservative_sparse_sparse_product_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result); - else - internal::sparse_sparse_product_with_pruning_selector<_LhsNested, _RhsNested, Dest>::run(lhs(),rhs(),result,m_tolerance); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } - - EIGEN_STRONG_INLINE const _LhsNested& lhs() const { return m_lhs; } - EIGEN_STRONG_INLINE const _RhsNested& rhs() const { return m_rhs; } - - protected: - void init() - { - eigen_assert(m_lhs.cols() == m_rhs.rows()); - - enum { - ProductIsValid = _LhsNested::ColsAtCompileTime==Dynamic - || _RhsNested::RowsAtCompileTime==Dynamic - || int(_LhsNested::ColsAtCompileTime)==int(_RhsNested::RowsAtCompileTime), - AreVectors = _LhsNested::IsVectorAtCompileTime && _RhsNested::IsVectorAtCompileTime, - SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(_LhsNested,_RhsNested) - }; - // note to the lost user: - // * for a dot product use: v1.dot(v2) - // * for a coeff-wise product use: v1.cwise()*v2 - EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) - EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) - EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - } - - LhsNested m_lhs; - RhsNested m_rhs; - RealScalar m_tolerance; - bool m_conservative; -}; - -// sparse = sparse * sparse -template -template -inline Derived& SparseMatrixBase::operator=(const SparseSparseProduct& product) -{ - product.evalTo(derived()); - return derived(); -} - -/** \returns an expression of the product of two sparse matrices. - * By default a conservative product preserving the symbolic non zeros is performed. - * The automatic pruning of the small values can be achieved by calling the pruned() function - * in which case a totally different product algorithm is employed: - * \code - * C = (A*B).pruned(); // supress numerical zeros (exact) - * C = (A*B).pruned(ref); - * C = (A*B).pruned(ref,epsilon); - * \endcode - * where \c ref is a meaningful non zero reference value. - * */ -template -template -inline const typename SparseSparseProductReturnType::Type -SparseMatrixBase::operator*(const SparseMatrixBase &other) const -{ - return typename SparseSparseProductReturnType::Type(derived(), other.derived()); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSEPRODUCT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseRedux.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseRedux.h index 51ed9aeb1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseRedux.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseRedux.h @@ -1,48 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEREDUX_H -#define EIGEN_SPARSEREDUX_H - -namespace Eigen { - -template -typename internal::traits::Scalar -SparseMatrixBase::sum() const -{ - eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); - Scalar res(0); - for (Index j=0; j -typename internal::traits >::Scalar -SparseMatrix<_Scalar,_Options,_Index>::sum() const -{ - eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); - if(this->isCompressed()) - return Matrix::Map(m_data.valuePtr(), m_data.size()).sum(); - else - return Base::sum(); -} - -template -typename internal::traits >::Scalar -SparseVector<_Scalar,_Options,_Index>::sum() const -{ - eigen_assert(rows()>0 && cols()>0 && "you are using a non initialized matrix"); - return Matrix::Map(m_data.valuePtr(), m_data.size()).sum(); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSEREDUX_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSelfAdjointView.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSelfAdjointView.h index 0eda96bc4..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -1,507 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_SELFADJOINTVIEW_H -#define EIGEN_SPARSE_SELFADJOINTVIEW_H - -namespace Eigen { - -/** \ingroup SparseCore_Module - * \class SparseSelfAdjointView - * - * \brief Pseudo expression to manipulate a triangular sparse matrix as a selfadjoint matrix. - * - * \param MatrixType the type of the dense matrix storing the coefficients - * \param UpLo can be either \c #Lower or \c #Upper - * - * This class is an expression of a sefladjoint matrix from a triangular part of a matrix - * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView() - * and most of the time this is the only way that it is used. - * - * \sa SparseMatrixBase::selfadjointView() - */ -template -class SparseSelfAdjointTimeDenseProduct; - -template -class DenseTimeSparseSelfAdjointProduct; - -namespace internal { - -template -struct traits > : traits { -}; - -template -void permute_symm_to_symm(const MatrixType& mat, SparseMatrix& _dest, const typename MatrixType::Index* perm = 0); - -template -void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix& _dest, const typename MatrixType::Index* perm = 0); - -} - -template class SparseSelfAdjointView - : public EigenBase > -{ - public: - - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - typedef Matrix VectorI; - typedef typename MatrixType::Nested MatrixTypeNested; - typedef typename internal::remove_all::type _MatrixTypeNested; - - inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) - { - eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices"); - } - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - /** \internal \returns a reference to the nested matrix */ - const _MatrixTypeNested& matrix() const { return m_matrix; } - _MatrixTypeNested& matrix() { return m_matrix.const_cast_derived(); } - - /** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs. - * - * Note that there is no algorithmic advantage of performing such a product compared to a general sparse-sparse matrix product. - * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product. - */ - template - SparseSparseProduct - operator*(const SparseMatrixBase& rhs) const - { - return SparseSparseProduct(*this, rhs.derived()); - } - - /** \returns an expression of the matrix product between a sparse matrix \a lhs and a sparse self-adjoint matrix \a rhs. - * - * Note that there is no algorithmic advantage of performing such a product compared to a general sparse-sparse matrix product. - * Indeed, the SparseSelfadjointView operand is first copied into a temporary SparseMatrix before computing the product. - */ - template friend - SparseSparseProduct - operator*(const SparseMatrixBase& lhs, const SparseSelfAdjointView& rhs) - { - return SparseSparseProduct(lhs.derived(), rhs); - } - - /** Efficient sparse self-adjoint matrix times dense vector/matrix product */ - template - SparseSelfAdjointTimeDenseProduct - operator*(const MatrixBase& rhs) const - { - return SparseSelfAdjointTimeDenseProduct(m_matrix, rhs.derived()); - } - - /** Efficient dense vector/matrix times sparse self-adjoint matrix product */ - template friend - DenseTimeSparseSelfAdjointProduct - operator*(const MatrixBase& lhs, const SparseSelfAdjointView& rhs) - { - return DenseTimeSparseSelfAdjointProduct(lhs.derived(), rhs.m_matrix); - } - - /** Perform a symmetric rank K update of the selfadjoint matrix \c *this: - * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix. - * - * \returns a reference to \c *this - * - * To perform \f$ this = this + \alpha ( u^* u ) \f$ you can simply - * call this function with u.adjoint(). - */ - template - SparseSelfAdjointView& rankUpdate(const SparseMatrixBase& u, const Scalar& alpha = Scalar(1)); - - /** \internal triggered by sparse_matrix = SparseSelfadjointView; */ - template void evalTo(SparseMatrix& _dest) const - { - internal::permute_symm_to_fullsymm(m_matrix, _dest); - } - - template void evalTo(DynamicSparseMatrix& _dest) const - { - // TODO directly evaluate into _dest; - SparseMatrix tmp(_dest.rows(),_dest.cols()); - internal::permute_symm_to_fullsymm(m_matrix, tmp); - _dest = tmp; - } - - /** \returns an expression of P H P^-1 */ - SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo> twistedBy(const PermutationMatrix& perm) const - { - return SparseSymmetricPermutationProduct<_MatrixTypeNested,UpLo>(m_matrix, perm); - } - - template - SparseSelfAdjointView& operator=(const SparseSymmetricPermutationProduct& permutedMatrix) - { - permutedMatrix.evalTo(*this); - return *this; - } - - - SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) - { - PermutationMatrix pnull; - return *this = src.twistedBy(pnull); - } - - template - SparseSelfAdjointView& operator=(const SparseSelfAdjointView& src) - { - PermutationMatrix pnull; - return *this = src.twistedBy(pnull); - } - - - // const SparseLLT llt() const; - // const SparseLDLT ldlt() const; - - protected: - - typename MatrixType::Nested m_matrix; - mutable VectorI m_countPerRow; - mutable VectorI m_countPerCol; -}; - -/*************************************************************************** -* Implementation of SparseMatrixBase methods -***************************************************************************/ - -template -template -const SparseSelfAdjointView SparseMatrixBase::selfadjointView() const -{ - return derived(); -} - -template -template -SparseSelfAdjointView SparseMatrixBase::selfadjointView() -{ - return derived(); -} - -/*************************************************************************** -* Implementation of SparseSelfAdjointView methods -***************************************************************************/ - -template -template -SparseSelfAdjointView& -SparseSelfAdjointView::rankUpdate(const SparseMatrixBase& u, const Scalar& alpha) -{ - SparseMatrix tmp = u * u.adjoint(); - if(alpha==Scalar(0)) - m_matrix.const_cast_derived() = tmp.template triangularView(); - else - m_matrix.const_cast_derived() += alpha * tmp.template triangularView(); - - return *this; -} - -/*************************************************************************** -* Implementation of sparse self-adjoint time dense matrix -***************************************************************************/ - -namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{ - typedef Dense StorageKind; -}; -} - -template -class SparseSelfAdjointTimeDenseProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(SparseSelfAdjointTimeDenseProduct) - - SparseSelfAdjointTimeDenseProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - EIGEN_ONLY_USED_FOR_DEBUG(alpha); - // TODO use alpha - eigen_assert(alpha==Scalar(1) && "alpha != 1 is not implemented yet, sorry"); - typedef typename internal::remove_all::type _Lhs; - typedef typename _Lhs::InnerIterator LhsInnerIterator; - enum { - LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit, - ProcessFirstHalf = - ((UpLo&(Upper|Lower))==(Upper|Lower)) - || ( (UpLo&Upper) && !LhsIsRowMajor) - || ( (UpLo&Lower) && LhsIsRowMajor), - ProcessSecondHalf = !ProcessFirstHalf - }; - for (Index j=0; j -struct traits > - : traits, Lhs, Rhs> > -{}; -} - -template -class DenseTimeSparseSelfAdjointProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(DenseTimeSparseSelfAdjointProduct) - - DenseTimeSparseSelfAdjointProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - {} - - template void scaleAndAddTo(Dest& /*dest*/, const Scalar& /*alpha*/) const - { - // TODO - } - - private: - DenseTimeSparseSelfAdjointProduct& operator=(const DenseTimeSparseSelfAdjointProduct&); -}; - -/*************************************************************************** -* Implementation of symmetric copies and permutations -***************************************************************************/ -namespace internal { - -template -struct traits > : traits { -}; - -template -void permute_symm_to_fullsymm(const MatrixType& mat, SparseMatrix& _dest, const typename MatrixType::Index* perm) -{ - typedef typename MatrixType::Index Index; - typedef typename MatrixType::Scalar Scalar; - typedef SparseMatrix Dest; - typedef Matrix VectorI; - - Dest& dest(_dest.derived()); - enum { - StorageOrderMatch = int(Dest::IsRowMajor) == int(MatrixType::IsRowMajor) - }; - - Index size = mat.rows(); - VectorI count; - count.resize(size); - count.setZero(); - dest.resize(size,size); - for(Index j = 0; jc) || ( UpLo==Upper && rc) || ( (UpLo&Upper)==Upper && r -void permute_symm_to_symm(const MatrixType& mat, SparseMatrix& _dest, const typename MatrixType::Index* perm) -{ - typedef typename MatrixType::Index Index; - typedef typename MatrixType::Scalar Scalar; - SparseMatrix& dest(_dest.derived()); - typedef Matrix VectorI; - enum { - SrcOrder = MatrixType::IsRowMajor ? RowMajor : ColMajor, - StorageOrderMatch = int(SrcOrder) == int(DstOrder), - DstUpLo = DstOrder==RowMajor ? (_DstUpLo==Upper ? Lower : Upper) : _DstUpLo, - SrcUpLo = SrcOrder==RowMajor ? (_SrcUpLo==Upper ? Lower : Upper) : _SrcUpLo - }; - - Index size = mat.rows(); - VectorI count(size); - count.setZero(); - dest.resize(size,size); - for(Index j = 0; jj)) - continue; - - Index ip = perm ? perm[i] : i; - count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; - } - } - dest.outerIndexPtr()[0] = 0; - for(Index j=0; jj)) - continue; - - Index jp = perm ? perm[j] : j; - Index ip = perm? perm[i] : i; - - Index k = count[int(DstUpLo)==int(Lower) ? (std::min)(ip,jp) : (std::max)(ip,jp)]++; - dest.innerIndexPtr()[k] = int(DstUpLo)==int(Lower) ? (std::max)(ip,jp) : (std::min)(ip,jp); - - if(!StorageOrderMatch) std::swap(ip,jp); - if( ((int(DstUpLo)==int(Lower) && ipjp))) - dest.valuePtr()[k] = numext::conj(it.value()); - else - dest.valuePtr()[k] = it.value(); - } - } -} - -} - -template -class SparseSymmetricPermutationProduct - : public EigenBase > -{ - public: - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - protected: - typedef PermutationMatrix Perm; - public: - typedef Matrix VectorI; - typedef typename MatrixType::Nested MatrixTypeNested; - typedef typename internal::remove_all::type _MatrixTypeNested; - - SparseSymmetricPermutationProduct(const MatrixType& mat, const Perm& perm) - : m_matrix(mat), m_perm(perm) - {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - template - void evalTo(SparseMatrix& _dest) const - { -// internal::permute_symm_to_fullsymm(m_matrix,_dest,m_perm.indices().data()); - SparseMatrix tmp; - internal::permute_symm_to_fullsymm(m_matrix,tmp,m_perm.indices().data()); - _dest = tmp; - } - - template void evalTo(SparseSelfAdjointView& dest) const - { - internal::permute_symm_to_symm(m_matrix,dest.matrix(),m_perm.indices().data()); - } - - protected: - MatrixTypeNested m_matrix; - const Perm& m_perm; - -}; - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_SELFADJOINTVIEW_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSparseProductWithPruning.h index 55b84a4eb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseSparseProductWithPruning.h @@ -1,150 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H -#define EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H - -namespace Eigen { - -namespace internal { - - -// perform a pseudo in-place sparse * sparse product assuming all matrices are col major -template -static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, const typename ResultType::RealScalar& tolerance) -{ - // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res); - - typedef typename remove_all::type::Scalar Scalar; - typedef typename remove_all::type::Index Index; - - // make sure to call innerSize/outerSize since we fake the storage order. - Index rows = lhs.innerSize(); - Index cols = rhs.outerSize(); - //Index size = lhs.outerSize(); - eigen_assert(lhs.outerSize() == rhs.innerSize()); - - // allocate a temporary buffer - AmbiVector tempVector(rows); - - // estimate the number of non zero entries - // given a rhs column containing Y non zeros, we assume that the respective Y columns - // of the lhs differs in average of one non zeros, thus the number of non zeros for - // the product of a rhs column with the lhs is X+Y where X is the average number of non zero - // per column of the lhs. - // Therefore, we have nnz(lhs*rhs) = nnz(lhs) + nnz(rhs) - Index estimated_nnz_prod = lhs.nonZeros() + rhs.nonZeros(); - - // mimics a resizeByInnerOuter: - if(ResultType::IsRowMajor) - res.resize(cols, rows); - else - res.resize(rows, cols); - - res.reserve(estimated_nnz_prod); - double ratioColRes = double(estimated_nnz_prod)/(double(lhs.rows())*double(rhs.cols())); - for (Index j=0; j::Iterator it(tempVector,tolerance); it; ++it) - res.insertBackByOuterInner(j,it.index()) = it.value(); - } - res.finalize(); -} - -template::Flags&RowMajorBit, - int RhsStorageOrder = traits::Flags&RowMajorBit, - int ResStorageOrder = traits::Flags&RowMajorBit> -struct sparse_sparse_product_with_pruning_selector; - -template -struct sparse_sparse_product_with_pruning_selector -{ - typedef typename traits::type>::Scalar Scalar; - typedef typename ResultType::RealScalar RealScalar; - - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) - { - typename remove_all::type _res(res.rows(), res.cols()); - internal::sparse_sparse_product_with_pruning_impl(lhs, rhs, _res, tolerance); - res.swap(_res); - } -}; - -template -struct sparse_sparse_product_with_pruning_selector -{ - typedef typename ResultType::RealScalar RealScalar; - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) - { - // we need a col-major matrix to hold the result - typedef SparseMatrix SparseTemporaryType; - SparseTemporaryType _res(res.rows(), res.cols()); - internal::sparse_sparse_product_with_pruning_impl(lhs, rhs, _res, tolerance); - res = _res; - } -}; - -template -struct sparse_sparse_product_with_pruning_selector -{ - typedef typename ResultType::RealScalar RealScalar; - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) - { - // let's transpose the product to get a column x column product - typename remove_all::type _res(res.rows(), res.cols()); - internal::sparse_sparse_product_with_pruning_impl(rhs, lhs, _res, tolerance); - res.swap(_res); - } -}; - -template -struct sparse_sparse_product_with_pruning_selector -{ - typedef typename ResultType::RealScalar RealScalar; - static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) - { - typedef SparseMatrix ColMajorMatrixLhs; - typedef SparseMatrix ColMajorMatrixRhs; - ColMajorMatrixLhs colLhs(lhs); - ColMajorMatrixRhs colRhs(rhs); - internal::sparse_sparse_product_with_pruning_impl(colLhs, colRhs, res, tolerance); - - // let's transpose the product to get a column x column product -// typedef SparseMatrix SparseTemporaryType; -// SparseTemporaryType _res(res.cols(), res.rows()); -// sparse_sparse_product_with_pruning_impl(rhs, lhs, _res); -// res = _res.transpose(); - } -}; - -// NOTE the 2 others cases (col row *) must never occur since they are caught -// by ProductReturnType which transforms it to (col col *) by evaluating rhs. - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SPARSESPARSEPRODUCTWITHPRUNING_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTranspose.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTranspose.h index 76d031d52..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTranspose.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTranspose.h @@ -1,63 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSETRANSPOSE_H -#define EIGEN_SPARSETRANSPOSE_H - -namespace Eigen { - -template class TransposeImpl - : public SparseMatrixBase > -{ - typedef typename internal::remove_all::type _MatrixTypeNested; - public: - - EIGEN_SPARSE_PUBLIC_INTERFACE(Transpose ) - - class InnerIterator; - class ReverseInnerIterator; - - inline Index nonZeros() const { return derived().nestedExpression().nonZeros(); } -}; - -// NOTE: VC10 and VC11 trigger an ICE if don't put typename TransposeImpl:: in front of Index, -// a typedef typename TransposeImpl::Index Index; -// does not fix the issue. -// An alternative is to define the nested class in the parent class itself. -template class TransposeImpl::InnerIterator - : public _MatrixTypeNested::InnerIterator -{ - typedef typename _MatrixTypeNested::InnerIterator Base; - typedef typename TransposeImpl::Index Index; - public: - - EIGEN_STRONG_INLINE InnerIterator(const TransposeImpl& trans, typename TransposeImpl::Index outer) - : Base(trans.derived().nestedExpression(), outer) - {} - typename TransposeImpl::Index row() const { return Base::col(); } - typename TransposeImpl::Index col() const { return Base::row(); } -}; - -template class TransposeImpl::ReverseInnerIterator - : public _MatrixTypeNested::ReverseInnerIterator -{ - typedef typename _MatrixTypeNested::ReverseInnerIterator Base; - typedef typename TransposeImpl::Index Index; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const TransposeImpl& xpr, typename TransposeImpl::Index outer) - : Base(xpr.derived().nestedExpression(), outer) - {} - typename TransposeImpl::Index row() const { return Base::col(); } - typename TransposeImpl::Index col() const { return Base::row(); } -}; - -} // end namespace Eigen - -#endif // EIGEN_SPARSETRANSPOSE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTriangularView.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTriangularView.h index 333127b78..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTriangularView.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseTriangularView.h @@ -1,179 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_TRIANGULARVIEW_H -#define EIGEN_SPARSE_TRIANGULARVIEW_H - -namespace Eigen { - -namespace internal { - -template -struct traits > -: public traits -{}; - -} // namespace internal - -template class SparseTriangularView - : public SparseMatrixBase > -{ - enum { SkipFirst = ((Mode&Lower) && !(MatrixType::Flags&RowMajorBit)) - || ((Mode&Upper) && (MatrixType::Flags&RowMajorBit)), - SkipLast = !SkipFirst, - SkipDiag = (Mode&ZeroDiag) ? 1 : 0, - HasUnitDiag = (Mode&UnitDiag) ? 1 : 0 - }; - - public: - - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseTriangularView) - - class InnerIterator; - class ReverseInnerIterator; - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - typedef typename MatrixType::Nested MatrixTypeNested; - typedef typename internal::remove_reference::type MatrixTypeNestedNonRef; - typedef typename internal::remove_all::type MatrixTypeNestedCleaned; - - inline SparseTriangularView(const MatrixType& matrix) : m_matrix(matrix) {} - - /** \internal */ - inline const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } - - template - typename internal::plain_matrix_type_column_major::type - solve(const MatrixBase& other) const; - - template void solveInPlace(MatrixBase& other) const; - template void solveInPlace(SparseMatrixBase& other) const; - - protected: - MatrixTypeNested m_matrix; -}; - -template -class SparseTriangularView::InnerIterator : public MatrixTypeNestedCleaned::InnerIterator -{ - typedef typename MatrixTypeNestedCleaned::InnerIterator Base; - typedef typename SparseTriangularView::Index Index; - public: - - EIGEN_STRONG_INLINE InnerIterator(const SparseTriangularView& view, Index outer) - : Base(view.nestedExpression(), outer), m_returnOne(false) - { - if(SkipFirst) - { - while((*this) && ((HasUnitDiag||SkipDiag) ? this->index()<=outer : this->index()=Base::outer())) - { - if((!SkipFirst) && Base::operator bool()) - Base::operator++(); - m_returnOne = true; - } - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { - if(HasUnitDiag && m_returnOne) - m_returnOne = false; - else - { - Base::operator++(); - if(HasUnitDiag && (!SkipFirst) && ((!Base::operator bool()) || Base::index()>=Base::outer())) - { - if((!SkipFirst) && Base::operator bool()) - Base::operator++(); - m_returnOne = true; - } - } - return *this; - } - - inline Index row() const { return (MatrixType::Flags&RowMajorBit ? Base::outer() : this->index()); } - inline Index col() const { return (MatrixType::Flags&RowMajorBit ? this->index() : Base::outer()); } - inline Index index() const - { - if(HasUnitDiag && m_returnOne) return Base::outer(); - else return Base::index(); - } - inline Scalar value() const - { - if(HasUnitDiag && m_returnOne) return Scalar(1); - else return Base::value(); - } - - EIGEN_STRONG_INLINE operator bool() const - { - if(HasUnitDiag && m_returnOne) - return true; - if(SkipFirst) return Base::operator bool(); - else - { - if (SkipDiag) return (Base::operator bool() && this->index() < this->outer()); - else return (Base::operator bool() && this->index() <= this->outer()); - } - } - protected: - bool m_returnOne; -}; - -template -class SparseTriangularView::ReverseInnerIterator : public MatrixTypeNestedCleaned::ReverseInnerIterator -{ - typedef typename MatrixTypeNestedCleaned::ReverseInnerIterator Base; - typedef typename SparseTriangularView::Index Index; - public: - - EIGEN_STRONG_INLINE ReverseInnerIterator(const SparseTriangularView& view, Index outer) - : Base(view.nestedExpression(), outer) - { - eigen_assert((!HasUnitDiag) && "ReverseInnerIterator does not support yet triangular views with a unit diagonal"); - if(SkipLast) { - while((*this) && (SkipDiag ? this->index()>=outer : this->index()>outer)) - --(*this); - } - } - - EIGEN_STRONG_INLINE ReverseInnerIterator& operator--() - { Base::operator--(); return *this; } - - inline Index row() const { return Base::row(); } - inline Index col() const { return Base::col(); } - - EIGEN_STRONG_INLINE operator bool() const - { - if (SkipLast) return Base::operator bool() ; - else - { - if(SkipDiag) return (Base::operator bool() && this->index() > this->outer()); - else return (Base::operator bool() && this->index() >= this->outer()); - } - } -}; - -template -template -inline const SparseTriangularView -SparseMatrixBase::triangularView() const -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_TRIANGULARVIEW_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseUtil.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseUtil.h index d627546de..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseUtil.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseUtil.h @@ -1,172 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEUTIL_H -#define EIGEN_SPARSEUTIL_H - -namespace Eigen { - -#ifdef NDEBUG -#define EIGEN_DBG_SPARSE(X) -#else -#define EIGEN_DBG_SPARSE(X) X -#endif - -#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, Op) \ -template \ -EIGEN_STRONG_INLINE Derived& operator Op(const Eigen::SparseMatrixBase& other) \ -{ \ - return Base::operator Op(other.derived()); \ -} \ -EIGEN_STRONG_INLINE Derived& operator Op(const Derived& other) \ -{ \ - return Base::operator Op(other); \ -} - -#define EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, Op) \ -template \ -EIGEN_STRONG_INLINE Derived& operator Op(const Other& scalar) \ -{ \ - return Base::operator Op(scalar); \ -} - -#define EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATORS(Derived) \ -EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, =) \ -EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, +=) \ -EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(Derived, -=) \ -EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, *=) \ -EIGEN_SPARSE_INHERIT_SCALAR_ASSIGNMENT_OPERATOR(Derived, /=) - -#define _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived, BaseClass) \ - typedef BaseClass Base; \ - typedef typename Eigen::internal::traits::Scalar Scalar; \ - typedef typename Eigen::NumTraits::Real RealScalar; \ - typedef typename Eigen::internal::nested::type Nested; \ - typedef typename Eigen::internal::traits::StorageKind StorageKind; \ - typedef typename Eigen::internal::traits::Index Index; \ - enum { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ - ColsAtCompileTime = Eigen::internal::traits::ColsAtCompileTime, \ - Flags = Eigen::internal::traits::Flags, \ - CoeffReadCost = Eigen::internal::traits::CoeffReadCost, \ - SizeAtCompileTime = Base::SizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ - using Base::derived; \ - using Base::const_cast_derived; - -#define EIGEN_SPARSE_PUBLIC_INTERFACE(Derived) \ - _EIGEN_SPARSE_PUBLIC_INTERFACE(Derived, Eigen::SparseMatrixBase) - -const int CoherentAccessPattern = 0x1; -const int InnerRandomAccessPattern = 0x2 | CoherentAccessPattern; -const int OuterRandomAccessPattern = 0x4 | CoherentAccessPattern; -const int RandomAccessPattern = 0x8 | OuterRandomAccessPattern | InnerRandomAccessPattern; - -template class SparseMatrix; -template class DynamicSparseMatrix; -template class SparseVector; -template class MappedSparseMatrix; - -template class SparseTriangularView; -template class SparseSelfAdjointView; -template class SparseDiagonalProduct; -template class SparseView; - -template class SparseSparseProduct; -template class SparseTimeDenseProduct; -template class DenseTimeSparseProduct; -template class SparseDenseOuterProduct; - -template struct SparseSparseProductReturnType; -template::ColsAtCompileTime,internal::traits::RowsAtCompileTime)> struct DenseSparseProductReturnType; -template::ColsAtCompileTime,internal::traits::RowsAtCompileTime)> struct SparseDenseProductReturnType; -template class SparseSymmetricPermutationProduct; - -namespace internal { - -template struct sparse_eval; - -template struct eval - : public sparse_eval::RowsAtCompileTime,traits::ColsAtCompileTime> -{}; - -template struct sparse_eval { - typedef typename traits::Scalar _Scalar; - typedef typename traits::Index _Index; - public: - typedef SparseVector<_Scalar, RowMajor, _Index> type; -}; - -template struct sparse_eval { - typedef typename traits::Scalar _Scalar; - typedef typename traits::Index _Index; - public: - typedef SparseVector<_Scalar, ColMajor, _Index> type; -}; - -template struct sparse_eval { - typedef typename traits::Scalar _Scalar; - typedef typename traits::Index _Index; - enum { _Options = ((traits::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; - public: - typedef SparseMatrix<_Scalar, _Options, _Index> type; -}; - -template struct sparse_eval { - typedef typename traits::Scalar _Scalar; - public: - typedef Matrix<_Scalar, 1, 1> type; -}; - -template struct plain_matrix_type -{ - typedef typename traits::Scalar _Scalar; - typedef typename traits::Index _Index; - enum { _Options = ((traits::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; - public: - typedef SparseMatrix<_Scalar, _Options, _Index> type; -}; - -} // end namespace internal - -/** \ingroup SparseCore_Module - * - * \class Triplet - * - * \brief A small structure to hold a non zero as a triplet (i,j,value). - * - * \sa SparseMatrix::setFromTriplets() - */ -template::Index > -class Triplet -{ -public: - Triplet() : m_row(0), m_col(0), m_value(0) {} - - Triplet(const Index& i, const Index& j, const Scalar& v = Scalar(0)) - : m_row(i), m_col(j), m_value(v) - {} - - /** \returns the row index of the element */ - const Index& row() const { return m_row; } - - /** \returns the column index of the element */ - const Index& col() const { return m_col; } - - /** \returns the value of the element */ - const Scalar& value() const { return m_value; } -protected: - Index m_row, m_col; - Scalar m_value; -}; - -} // end namespace Eigen - -#endif // EIGEN_SPARSEUTIL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseVector.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseVector.h index c7ee89cb8..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseVector.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseVector.h @@ -1,448 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEVECTOR_H -#define EIGEN_SPARSEVECTOR_H - -namespace Eigen { - -/** \ingroup SparseCore_Module - * \class SparseVector - * - * \brief a sparse vector class - * - * \tparam _Scalar the scalar type, i.e. the type of the coefficients - * - * See http://www.netlib.org/linalg/html_templates/node91.html for details on the storage scheme. - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_SPARSEVECTOR_PLUGIN. - */ - -namespace internal { -template -struct traits > -{ - typedef _Scalar Scalar; - typedef _Index Index; - typedef Sparse StorageKind; - typedef MatrixXpr XprKind; - enum { - IsColVector = (_Options & RowMajorBit) ? 0 : 1, - - RowsAtCompileTime = IsColVector ? Dynamic : 1, - ColsAtCompileTime = IsColVector ? 1 : Dynamic, - MaxRowsAtCompileTime = RowsAtCompileTime, - MaxColsAtCompileTime = ColsAtCompileTime, - Flags = _Options | NestByRefBit | LvalueBit | (IsColVector ? 0 : RowMajorBit), - CoeffReadCost = NumTraits::ReadCost, - SupportedAccessPatterns = InnerRandomAccessPattern - }; -}; - -// Sparse-Vector-Assignment kinds: -enum { - SVA_RuntimeSwitch, - SVA_Inner, - SVA_Outer -}; - -template< typename Dest, typename Src, - int AssignmentKind = !bool(Src::IsVectorAtCompileTime) ? SVA_RuntimeSwitch - : Src::InnerSizeAtCompileTime==1 ? SVA_Outer - : SVA_Inner> -struct sparse_vector_assign_selector; - -} - -template -class SparseVector - : public SparseMatrixBase > -{ - typedef SparseMatrixBase SparseBase; - - public: - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseVector) - EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, +=) - EIGEN_SPARSE_INHERIT_ASSIGNMENT_OPERATOR(SparseVector, -=) - - typedef internal::CompressedStorage Storage; - enum { IsColVector = internal::traits::IsColVector }; - - enum { - Options = _Options - }; - - EIGEN_STRONG_INLINE Index rows() const { return IsColVector ? m_size : 1; } - EIGEN_STRONG_INLINE Index cols() const { return IsColVector ? 1 : m_size; } - EIGEN_STRONG_INLINE Index innerSize() const { return m_size; } - EIGEN_STRONG_INLINE Index outerSize() const { return 1; } - - EIGEN_STRONG_INLINE const Scalar* valuePtr() const { return m_data.valuePtr(); } - EIGEN_STRONG_INLINE Scalar* valuePtr() { return m_data.valuePtr(); } - - EIGEN_STRONG_INLINE const Index* innerIndexPtr() const { return m_data.indexPtr(); } - EIGEN_STRONG_INLINE Index* innerIndexPtr() { return m_data.indexPtr(); } - - /** \internal */ - inline Storage& data() { return m_data; } - /** \internal */ - inline const Storage& data() const { return m_data; } - - inline Scalar coeff(Index row, Index col) const - { - eigen_assert(IsColVector ? (col==0 && row>=0 && row=0 && col=0 && i=0 && row=0 && col=0 && i(m_data.size()); } - - inline void startVec(Index outer) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - inline Scalar& insertBackByOuterInner(Index outer, Index inner) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - return insertBack(inner); - } - inline Scalar& insertBack(Index i) - { - m_data.append(0, i); - return m_data.value(m_data.size()-1); - } - - inline Scalar& insert(Index row, Index col) - { - eigen_assert(IsColVector ? (col==0 && row>=0 && row=0 && col=0 && i= startId) && (m_data.index(p) > i) ) - { - m_data.index(p+1) = m_data.index(p); - m_data.value(p+1) = m_data.value(p); - --p; - } - m_data.index(p+1) = i; - m_data.value(p+1) = 0; - return m_data.value(p+1); - } - - /** - */ - inline void reserve(Index reserveSize) { m_data.reserve(reserveSize); } - - - inline void finalize() {} - - void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits::dummy_precision()) - { - m_data.prune(reference,epsilon); - } - - void resize(Index rows, Index cols) - { - eigen_assert(rows==1 || cols==1); - resize(IsColVector ? rows : cols); - } - - void resize(Index newSize) - { - m_size = newSize; - m_data.clear(); - } - - void resizeNonZeros(Index size) { m_data.resize(size); } - - inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); } - - inline SparseVector(Index size) : m_size(0) { check_template_parameters(); resize(size); } - - inline SparseVector(Index rows, Index cols) : m_size(0) { check_template_parameters(); resize(rows,cols); } - - template - inline SparseVector(const SparseMatrixBase& other) - : m_size(0) - { - check_template_parameters(); - *this = other.derived(); - } - - inline SparseVector(const SparseVector& other) - : SparseBase(other), m_size(0) - { - check_template_parameters(); - *this = other.derived(); - } - - /** Swaps the values of \c *this and \a other. - * Overloaded for performance: this version performs a \em shallow swap by swaping pointers and attributes only. - * \sa SparseMatrixBase::swap() - */ - inline void swap(SparseVector& other) - { - std::swap(m_size, other.m_size); - m_data.swap(other.m_data); - } - - inline SparseVector& operator=(const SparseVector& other) - { - if (other.isRValue()) - { - swap(other.const_cast_derived()); - } - else - { - resize(other.size()); - m_data = other.m_data; - } - return *this; - } - - template - inline SparseVector& operator=(const SparseMatrixBase& other) - { - SparseVector tmp(other.size()); - internal::sparse_vector_assign_selector::run(tmp,other.derived()); - this->swap(tmp); - return *this; - } - - #ifndef EIGEN_PARSED_BY_DOXYGEN - template - inline SparseVector& operator=(const SparseSparseProduct& product) - { - return Base::operator=(product); - } - #endif - - friend std::ostream & operator << (std::ostream & s, const SparseVector& m) - { - for (Index i=0; i::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); - EIGEN_STATIC_ASSERT((_Options&(ColMajor|RowMajor))==Options,INVALID_MATRIX_TEMPLATE_PARAMETERS); - } - - Storage m_data; - Index m_size; -}; - -template -class SparseVector::InnerIterator -{ - public: - InnerIterator(const SparseVector& vec, Index outer=0) - : m_data(vec.m_data), m_id(0), m_end(static_cast(m_data.size())) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - InnerIterator(const internal::CompressedStorage& data) - : m_data(data), m_id(0), m_end(static_cast(m_data.size())) - {} - - inline InnerIterator& operator++() { m_id++; return *this; } - - inline Scalar value() const { return m_data.value(m_id); } - inline Scalar& valueRef() { return const_cast(m_data.value(m_id)); } - - inline Index index() const { return m_data.index(m_id); } - inline Index row() const { return IsColVector ? index() : 0; } - inline Index col() const { return IsColVector ? 0 : index(); } - - inline operator bool() const { return (m_id < m_end); } - - protected: - const internal::CompressedStorage& m_data; - Index m_id; - const Index m_end; -}; - -template -class SparseVector::ReverseInnerIterator -{ - public: - ReverseInnerIterator(const SparseVector& vec, Index outer=0) - : m_data(vec.m_data), m_id(static_cast(m_data.size())), m_start(0) - { - EIGEN_UNUSED_VARIABLE(outer); - eigen_assert(outer==0); - } - - ReverseInnerIterator(const internal::CompressedStorage& data) - : m_data(data), m_id(static_cast(m_data.size())), m_start(0) - {} - - inline ReverseInnerIterator& operator--() { m_id--; return *this; } - - inline Scalar value() const { return m_data.value(m_id-1); } - inline Scalar& valueRef() { return const_cast(m_data.value(m_id-1)); } - - inline Index index() const { return m_data.index(m_id-1); } - inline Index row() const { return IsColVector ? index() : 0; } - inline Index col() const { return IsColVector ? 0 : index(); } - - inline operator bool() const { return (m_id > m_start); } - - protected: - const internal::CompressedStorage& m_data; - Index m_id; - const Index m_start; -}; - -namespace internal { - -template< typename Dest, typename Src> -struct sparse_vector_assign_selector { - static void run(Dest& dst, const Src& src) { - eigen_internal_assert(src.innerSize()==src.size()); - for(typename Src::InnerIterator it(src, 0); it; ++it) - dst.insert(it.index()) = it.value(); - } -}; - -template< typename Dest, typename Src> -struct sparse_vector_assign_selector { - static void run(Dest& dst, const Src& src) { - eigen_internal_assert(src.outerSize()==src.size()); - for(typename Dest::Index i=0; i -struct sparse_vector_assign_selector { - static void run(Dest& dst, const Src& src) { - if(src.outerSize()==1) sparse_vector_assign_selector::run(dst, src); - else sparse_vector_assign_selector::run(dst, src); - } -}; - -} - -} // end namespace Eigen - -#endif // EIGEN_SPARSEVECTOR_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseView.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseView.h index 2820b39b8..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseView.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/SparseView.h @@ -1,99 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2011 Gael Guennebaud -// Copyright (C) 2010 Daniel Lowengrub -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSEVIEW_H -#define EIGEN_SPARSEVIEW_H - -namespace Eigen { - -namespace internal { - -template -struct traits > : traits -{ - typedef typename MatrixType::Index Index; - typedef Sparse StorageKind; - enum { - Flags = int(traits::Flags) & (RowMajorBit) - }; -}; - -} // end namespace internal - -template -class SparseView : public SparseMatrixBase > -{ - typedef typename MatrixType::Nested MatrixTypeNested; - typedef typename internal::remove_all::type _MatrixTypeNested; -public: - EIGEN_SPARSE_PUBLIC_INTERFACE(SparseView) - - explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0), - const RealScalar &epsilon = NumTraits::dummy_precision()) - : m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {} - - class InnerIterator; - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - inline Index innerSize() const { return m_matrix.innerSize(); } - inline Index outerSize() const { return m_matrix.outerSize(); } - -protected: - MatrixTypeNested m_matrix; - Scalar m_reference; - typename NumTraits::Real m_epsilon; -}; - -template -class SparseView::InnerIterator : public _MatrixTypeNested::InnerIterator -{ - typedef typename SparseView::Index Index; -public: - typedef typename _MatrixTypeNested::InnerIterator IterBase; - InnerIterator(const SparseView& view, Index outer) : - IterBase(view.m_matrix, outer), m_view(view) - { - incrementToNonZero(); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() - { - IterBase::operator++(); - incrementToNonZero(); - return *this; - } - - using IterBase::value; - -protected: - const SparseView& m_view; - -private: - void incrementToNonZero() - { - while((bool(*this)) && internal::isMuchSmallerThan(value(), m_view.m_reference, m_view.m_epsilon)) - { - IterBase::operator++(); - } - } -}; - -template -const SparseView MatrixBase::sparseView(const Scalar& m_reference, - const typename NumTraits::Real& m_epsilon) const -{ - return SparseView(derived(), m_reference, m_epsilon); -} - -} // end namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/TriangularSolver.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/TriangularSolver.h index ccc12af79..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/TriangularSolver.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseCore/TriangularSolver.h @@ -1,334 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSETRIANGULARSOLVER_H -#define EIGEN_SPARSETRIANGULARSOLVER_H - -namespace Eigen { - -namespace internal { - -template::Flags) & RowMajorBit> -struct sparse_solve_triangular_selector; - -// forward substitution, row-major -template -struct sparse_solve_triangular_selector -{ - typedef typename Rhs::Scalar Scalar; - static void run(const Lhs& lhs, Rhs& other) - { - for(int col=0 ; col -struct sparse_solve_triangular_selector -{ - typedef typename Rhs::Scalar Scalar; - static void run(const Lhs& lhs, Rhs& other) - { - for(int col=0 ; col=0 ; --i) - { - Scalar tmp = other.coeff(i,col); - Scalar l_ii(0); - typename Lhs::InnerIterator it(lhs, i); - while(it && it.index() -struct sparse_solve_triangular_selector -{ - typedef typename Rhs::Scalar Scalar; - static void run(const Lhs& lhs, Rhs& other) - { - for(int col=0 ; col -struct sparse_solve_triangular_selector -{ - typedef typename Rhs::Scalar Scalar; - static void run(const Lhs& lhs, Rhs& other) - { - for(int col=0 ; col=0; --i) - { - Scalar& tmp = other.coeffRef(i,col); - if (tmp!=Scalar(0)) // optimization when other is actually sparse - { - if(!(Mode & UnitDiag)) - { - // TODO replace this by a binary search. make sure the binary search is safe for partially sorted elements - typename Lhs::ReverseInnerIterator it(lhs, i); - while(it && it.index()!=i) - --it; - eigen_assert(it && it.index()==i); - other.coeffRef(i,col) /= it.value(); - } - typename Lhs::InnerIterator it(lhs, i); - for(; it && it.index() -template -void SparseTriangularView::solveInPlace(MatrixBase& other) const -{ - eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows()); - eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); - - enum { copy = internal::traits::Flags & RowMajorBit }; - - typedef typename internal::conditional::type, OtherDerived&>::type OtherCopy; - OtherCopy otherCopy(other.derived()); - - internal::sparse_solve_triangular_selector::type, Mode>::run(m_matrix, otherCopy); - - if (copy) - other = otherCopy; -} - -template -template -typename internal::plain_matrix_type_column_major::type -SparseTriangularView::solve(const MatrixBase& other) const -{ - typename internal::plain_matrix_type_column_major::type res(other); - solveInPlace(res); - return res; -} - -// pure sparse path - -namespace internal { - -template -struct sparse_solve_triangular_sparse_selector; - -// forward substitution, col-major -template -struct sparse_solve_triangular_sparse_selector -{ - typedef typename Rhs::Scalar Scalar; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - static void run(const Lhs& lhs, Rhs& other) - { - const bool IsLower = (UpLo==Lower); - AmbiVector tempVector(other.rows()*2); - tempVector.setBounds(0,other.rows()); - - Rhs res(other.rows(), other.cols()); - res.reserve(other.nonZeros()); - - for(int col=0 ; col=0; - i+=IsLower?1:-1) - { - tempVector.restart(); - Scalar& ci = tempVector.coeffRef(i); - if (ci!=Scalar(0)) - { - // find - typename Lhs::InnerIterator it(lhs, i); - if(!(Mode & UnitDiag)) - { - if (IsLower) - { - eigen_assert(it.index()==i); - ci /= it.value(); - } - else - ci /= lhs.coeff(i,i); - } - tempVector.restart(); - if (IsLower) - { - if (it.index()==i) - ++it; - for(; it; ++it) - tempVector.coeffRef(it.index()) -= ci * it.value(); - } - else - { - for(; it && it.index()::Iterator it(tempVector/*,1e-12*/); it; ++it) - { - ++ count; -// std::cerr << "fill " << it.index() << ", " << col << "\n"; -// std::cout << it.value() << " "; - // FIXME use insertBack - res.insert(it.index(), col) = it.value(); - } -// std::cout << "tempVector.nonZeros() == " << int(count) << " / " << (other.rows()) << "\n"; - } - res.finalize(); - other = res.markAsRValue(); - } -}; - -} // end namespace internal - -template -template -void SparseTriangularView::solveInPlace(SparseMatrixBase& other) const -{ - eigen_assert(m_matrix.cols() == m_matrix.rows() && m_matrix.cols() == other.rows()); - eigen_assert( (!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); - -// enum { copy = internal::traits::Flags & RowMajorBit }; - -// typedef typename internal::conditional::type, OtherDerived&>::type OtherCopy; -// OtherCopy otherCopy(other.derived()); - - internal::sparse_solve_triangular_sparse_selector::run(m_matrix, other.derived()); - -// if (copy) -// other = otherCopy; -} - -#ifdef EIGEN2_SUPPORT - -// deprecated stuff: - -/** \deprecated */ -template -template -void SparseMatrixBase::solveTriangularInPlace(MatrixBase& other) const -{ - this->template triangular().solveInPlace(other); -} - -/** \deprecated */ -template -template -typename internal::plain_matrix_type_column_major::type -SparseMatrixBase::solveTriangular(const MatrixBase& other) const -{ - typename internal::plain_matrix_type_column_major::type res(other); - derived().solveTriangularInPlace(res); - return res; -} -#endif // EIGEN2_SUPPORT - -} // end namespace Eigen - -#endif // EIGEN_SPARSETRIANGULARSOLVER_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/CMakeLists.txt index 69729ee89..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SparseLU_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SparseLU_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU.h index bdc4f193d..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU.h @@ -1,806 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_SPARSE_LU_H -#define EIGEN_SPARSE_LU_H - -namespace Eigen { - -template > class SparseLU; -template struct SparseLUMatrixLReturnType; -template struct SparseLUMatrixUReturnType; - -/** \ingroup SparseLU_Module - * \class SparseLU - * - * \brief Sparse supernodal LU factorization for general matrices - * - * This class implements the supernodal LU factorization for general matrices. - * It uses the main techniques from the sequential SuperLU package - * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real - * and complex arithmetics with single and double precision, depending on the - * scalar type of your input matrix. - * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. - * It benefits directly from the built-in high-performant Eigen BLAS routines. - * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to - * enable a better optimization from the compiler. For best performance, - * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors. - * - * An important parameter of this class is the ordering method. It is used to reorder the columns - * (and eventually the rows) of the matrix to reduce the number of new elements that are created during - * numerical factorization. The cheapest method available is COLAMD. - * See \link OrderingMethods_Module the OrderingMethods module \endlink for the list of - * built-in and external ordering methods. - * - * Simple example with key steps - * \code - * VectorXd x(n), b(n); - * SparseMatrix A; - * SparseLU, COLAMDOrdering > solver; - * // fill A and b; - * // Compute the ordering permutation vector from the structural pattern of A - * solver.analyzePattern(A); - * // Compute the numerical factorization - * solver.factorize(A); - * //Use the factors to solve the linear system - * x = solver.solve(b); - * \endcode - * - * \warning The input matrix A should be in a \b compressed and \b column-major form. - * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix. - * - * \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. - * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. - * If this is the case for your matrices, you can try the basic scaling method at - * "unsupported/Eigen/src/IterativeSolvers/Scaling.h" - * - * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> - * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD - * - * - * \sa \ref TutorialSparseDirectSolvers - * \sa \ref OrderingMethods_Module - */ -template -class SparseLU : public internal::SparseLUImpl -{ - public: - typedef _MatrixType MatrixType; - typedef _OrderingType OrderingType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix NCMatrix; - typedef internal::MappedSuperNodalMatrix SCMatrix; - typedef Matrix ScalarVector; - typedef Matrix IndexVector; - typedef PermutationMatrix PermutationType; - typedef internal::SparseLUImpl Base; - - public: - SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) - { - initperfvalues(); - } - SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) - { - initperfvalues(); - compute(matrix); - } - - ~SparseLU() - { - // Free all explicit dynamic pointers - } - - void analyzePattern (const MatrixType& matrix); - void factorize (const MatrixType& matrix); - void simplicialfactorize(const MatrixType& matrix); - - /** - * Compute the symbolic and numeric factorization of the input sparse matrix. - * The input matrix should be in column-major storage. - */ - void compute (const MatrixType& matrix) - { - // Analyze - analyzePattern(matrix); - //Factorize - factorize(matrix); - } - - inline Index rows() const { return m_mat.rows(); } - inline Index cols() const { return m_mat.cols(); } - /** Indicate that the pattern of the input matrix is symmetric */ - void isSymmetric(bool sym) - { - m_symmetricmode = sym; - } - - /** \returns an expression of the matrix L, internally stored as supernodes - * The only operation available with this expression is the triangular solve - * \code - * y = b; matrixL().solveInPlace(y); - * \endcode - */ - SparseLUMatrixLReturnType matrixL() const - { - return SparseLUMatrixLReturnType(m_Lstore); - } - /** \returns an expression of the matrix U, - * The only operation available with this expression is the triangular solve - * \code - * y = b; matrixU().solveInPlace(y); - * \endcode - */ - SparseLUMatrixUReturnType > matrixU() const - { - return SparseLUMatrixUReturnType >(m_Lstore, m_Ustore); - } - - /** - * \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$ - * \sa colsPermutation() - */ - inline const PermutationType& rowsPermutation() const - { - return m_perm_r; - } - /** - * \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$ - * \sa rowsPermutation() - */ - inline const PermutationType& colsPermutation() const - { - return m_perm_c; - } - /** Set the threshold used for a diagonal entry to be an acceptable pivot. */ - void setPivotThreshold(const RealScalar& thresh) - { - m_diagpivotthresh = thresh; - } - - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. - * - * \warning the destination matrix X in X = this->solve(B) must be colmun-major. - * - * \sa compute() - */ - template - inline const internal::solve_retval solve(const MatrixBase& B) const - { - eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); - eigen_assert(rows()==B.rows() - && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); - return internal::solve_retval(*this, B.derived()); - } - - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval solve(const SparseMatrixBase& B) const - { - eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); - eigen_assert(rows()==B.rows() - && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); - return internal::sparse_solve_retval(*this, B.derived()); - } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance - * \c InvalidInput if the input matrix is invalid - * - * \sa iparm() - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - /** - * \returns A string describing the type of error - */ - std::string lastErrorMessage() const - { - return m_lastError; - } - - template - bool _solve(const MatrixBase &B, MatrixBase &X_base) const - { - Dest& X(X_base.derived()); - eigen_assert(m_factorizationIsOk && "The matrix should be factorized first"); - EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - - // Permute the right hand side to form X = Pr*B - // on return, X is overwritten by the computed solution - X.resize(B.rows(),B.cols()); - - // this ugly const_cast_derived() helps to detect aliasing when applying the permutations - for(Index j = 0; j < B.cols(); ++j) - X.col(j) = rowsPermutation() * B.const_cast_derived().col(j); - - //Forward substitution with L - this->matrixL().solveInPlace(X); - this->matrixU().solveInPlace(X); - - // Permute back the solution - for (Index j = 0; j < B.cols(); ++j) - X.col(j) = colsPermutation().inverse() * X.col(j); - - return true; - } - - /** - * \returns the absolute value of the determinant of the matrix of which - * *this is the QR decomposition. - * - * \warning a determinant can be very big or small, so for matrices - * of large enough dimension, there is a risk of overflow/underflow. - * One way to work around that is to use logAbsDeterminant() instead. - * - * \sa logAbsDeterminant(), signDeterminant() - */ - Scalar absDeterminant() - { - eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); - // Initialize with the determinant of the row matrix - Scalar det = Scalar(1.); - // Note that the diagonal blocks of U are stored in supernodes, - // which are available in the L part :) - for (Index j = 0; j < this->cols(); ++j) - { - for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) - { - if(it.index() == j) - { - using std::abs; - det *= abs(it.value()); - break; - } - } - } - return det; - } - - /** \returns the natural log of the absolute value of the determinant of the matrix - * of which **this is the QR decomposition - * - * \note This method is useful to work around the risk of overflow/underflow that's - * inherent to the determinant computation. - * - * \sa absDeterminant(), signDeterminant() - */ - Scalar logAbsDeterminant() const - { - eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); - Scalar det = Scalar(0.); - for (Index j = 0; j < this->cols(); ++j) - { - for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) - { - if(it.row() < j) continue; - if(it.row() == j) - { - using std::log; using std::abs; - det += log(abs(it.value())); - break; - } - } - } - return det; - } - - /** \returns A number representing the sign of the determinant - * - * \sa absDeterminant(), logAbsDeterminant() - */ - Scalar signDeterminant() - { - eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); - // Initialize with the determinant of the row matrix - Index det = 1; - // Note that the diagonal blocks of U are stored in supernodes, - // which are available in the L part :) - for (Index j = 0; j < this->cols(); ++j) - { - for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) - { - if(it.index() == j) - { - if(it.value()<0) - det = -det; - else if(it.value()==0) - return 0; - break; - } - } - } - return det * m_detPermR * m_detPermC; - } - - /** \returns The determinant of the matrix. - * - * \sa absDeterminant(), logAbsDeterminant() - */ - Scalar determinant() - { - eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); - // Initialize with the determinant of the row matrix - Scalar det = Scalar(1.); - // Note that the diagonal blocks of U are stored in supernodes, - // which are available in the L part :) - for (Index j = 0; j < this->cols(); ++j) - { - for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) - { - if(it.index() == j) - { - det *= it.value(); - break; - } - } - } - return det * Scalar(m_detPermR * m_detPermC); - } - - protected: - // Functions - void initperfvalues() - { - m_perfv.panel_size = 16; - m_perfv.relax = 1; - m_perfv.maxsuper = 128; - m_perfv.rowblk = 16; - m_perfv.colblk = 8; - m_perfv.fillfactor = 20; - } - - // Variables - mutable ComputationInfo m_info; - bool m_isInitialized; - bool m_factorizationIsOk; - bool m_analysisIsOk; - std::string m_lastError; - NCMatrix m_mat; // The input (permuted ) matrix - SCMatrix m_Lstore; // The lower triangular matrix (supernodal) - MappedSparseMatrix m_Ustore; // The upper triangular matrix - PermutationType m_perm_c; // Column permutation - PermutationType m_perm_r ; // Row permutation - IndexVector m_etree; // Column elimination tree - - typename Base::GlobalLU_t m_glu; - - // SparseLU options - bool m_symmetricmode; - // values for performance - internal::perfvalues m_perfv; - RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot - Index m_nnzL, m_nnzU; // Nonzeros in L and U factors - Index m_detPermR, m_detPermC; // Determinants of the permutation matrices - private: - // Disable copy constructor - SparseLU (const SparseLU& ); - -}; // End class SparseLU - - - -// Functions needed by the anaysis phase -/** - * Compute the column permutation to minimize the fill-in - * - * - Apply this permutation to the input matrix - - * - * - Compute the column elimination tree on the permuted matrix - * - * - Postorder the elimination tree and the column permutation - * - */ -template -void SparseLU::analyzePattern(const MatrixType& mat) -{ - - //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. - - OrderingType ord; - ord(mat,m_perm_c); - - // Apply the permutation to the column of the input matrix - //First copy the whole input matrix. - m_mat = mat; - if (m_perm_c.size()) { - m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. - //Then, permute only the column pointers - const Index * outerIndexPtr; - if (mat.isCompressed()) outerIndexPtr = mat.outerIndexPtr(); - else - { - Index *outerIndexPtr_t = new Index[mat.cols()+1]; - for(Index i = 0; i <= mat.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i]; - outerIndexPtr = outerIndexPtr_t; - } - for (Index i = 0; i < mat.cols(); i++) - { - m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i]; - m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i]; - } - if(!mat.isCompressed()) delete[] outerIndexPtr; - } - // Compute the column elimination tree of the permuted matrix - IndexVector firstRowElt; - internal::coletree(m_mat, m_etree,firstRowElt); - - // In symmetric mode, do not do postorder here - if (!m_symmetricmode) { - IndexVector post, iwork; - // Post order etree - internal::treePostorder(m_mat.cols(), m_etree, post); - - - // Renumber etree in postorder - Index m = m_mat.cols(); - iwork.resize(m+1); - for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i)); - m_etree = iwork; - - // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree - PermutationType post_perm(m); - for (Index i = 0; i < m; i++) - post_perm.indices()(i) = post(i); - - // Combine the two permutations : postorder the permutation for future use - if(m_perm_c.size()) { - m_perm_c = post_perm * m_perm_c; - } - - } // end postordering - - m_analysisIsOk = true; -} - -// Functions needed by the numerical factorization phase - - -/** - * - Numerical factorization - * - Interleaved with the symbolic factorization - * On exit, info is - * - * = 0: successful factorization - * - * > 0: if info = i, and i is - * - * <= A->ncol: U(i,i) is exactly zero. The factorization has - * been completed, but the factor U is exactly singular, - * and division by zero will occur if it is used to solve a - * system of equations. - * - * > A->ncol: number of bytes allocated when memory allocation - * failure occurred, plus A->ncol. If lwork = -1, it is - * the estimated amount of space needed, plus A->ncol. - */ -template -void SparseLU::factorize(const MatrixType& matrix) -{ - using internal::emptyIdxLU; - eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); - eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); - - typedef typename IndexVector::Scalar Index; - - - // Apply the column permutation computed in analyzepattern() - // m_mat = matrix * m_perm_c.inverse(); - m_mat = matrix; - if (m_perm_c.size()) - { - m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. - //Then, permute only the column pointers - const Index * outerIndexPtr; - if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr(); - else - { - Index* outerIndexPtr_t = new Index[matrix.cols()+1]; - for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i]; - outerIndexPtr = outerIndexPtr_t; - } - for (Index i = 0; i < matrix.cols(); i++) - { - m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i]; - m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i]; - } - if(!matrix.isCompressed()) delete[] outerIndexPtr; - } - else - { //FIXME This should not be needed if the empty permutation is handled transparently - m_perm_c.resize(matrix.cols()); - for(Index i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i; - } - - Index m = m_mat.rows(); - Index n = m_mat.cols(); - Index nnz = m_mat.nonZeros(); - Index maxpanel = m_perfv.panel_size * m; - // Allocate working storage common to the factor routines - Index lwork = 0; - Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); - if (info) - { - m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; - m_factorizationIsOk = false; - return ; - } - - // Set up pointers for integer working arrays - IndexVector segrep(m); segrep.setZero(); - IndexVector parent(m); parent.setZero(); - IndexVector xplore(m); xplore.setZero(); - IndexVector repfnz(maxpanel); - IndexVector panel_lsub(maxpanel); - IndexVector xprune(n); xprune.setZero(); - IndexVector marker(m*internal::LUNoMarker); marker.setZero(); - - repfnz.setConstant(-1); - panel_lsub.setConstant(-1); - - // Set up pointers for scalar working arrays - ScalarVector dense; - dense.setZero(maxpanel); - ScalarVector tempv; - tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) ); - - // Compute the inverse of perm_c - PermutationType iperm_c(m_perm_c.inverse()); - - // Identify initial relaxed snodes - IndexVector relax_end(n); - if ( m_symmetricmode == true ) - Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); - else - Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); - - - m_perm_r.resize(m); - m_perm_r.indices().setConstant(-1); - marker.setConstant(-1); - m_detPermR = 1; // Record the determinant of the row permutation - - m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0); - m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0); - - // Work on one 'panel' at a time. A panel is one of the following : - // (a) a relaxed supernode at the bottom of the etree, or - // (b) panel_size contiguous columns, defined by the user - Index jcol; - IndexVector panel_histo(n); - Index pivrow; // Pivotal row number in the original row matrix - Index nseg1; // Number of segments in U-column above panel row jcol - Index nseg; // Number of segments in each U-column - Index irep; - Index i, k, jj; - for (jcol = 0; jcol < n; ) - { - // Adjust panel size so that a panel won't overlap with the next relaxed snode. - Index panel_size = m_perfv.panel_size; // upper bound on panel width - for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++) - { - if (relax_end(k) != emptyIdxLU) - { - panel_size = k - jcol; - break; - } - } - if (k == n) - panel_size = n - jcol; - - // Symbolic outer factorization on a panel of columns - Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); - - // Numeric sup-panel updates in topological order - Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); - - // Sparse LU within the panel, and below the panel diagonal - for ( jj = jcol; jj< jcol + panel_size; jj++) - { - k = (jj - jcol) * m; // Column index for w-wide arrays - - nseg = nseg1; // begin after all the panel segments - //Depth-first-search for the current column - VectorBlock panel_lsubk(panel_lsub, k, m); - VectorBlock repfnz_k(repfnz, k, m); - info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); - if ( info ) - { - m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() "; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - // Numeric updates to this column - VectorBlock dense_k(dense, k, m); - VectorBlock segrep_k(segrep, nseg1, m-nseg1); - info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); - if ( info ) - { - m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() "; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - - // Copy the U-segments to ucol(*) - info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); - if ( info ) - { - m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() "; - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - - // Form the L-segment - info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); - if ( info ) - { - m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT "; - std::ostringstream returnInfo; - returnInfo << info; - m_lastError += returnInfo.str(); - m_info = NumericalIssue; - m_factorizationIsOk = false; - return; - } - - // Update the determinant of the row permutation matrix - // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot. - if (pivrow != jj) m_detPermR = -m_detPermR; - - // Prune columns (0:jj-1) using column jj - Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); - - // Reset repfnz for this column - for (i = 0; i < nseg; i++) - { - irep = segrep(i); - repfnz_k(irep) = emptyIdxLU; - } - } // end SparseLU within the panel - jcol += panel_size; // Move to the next panel - } // end for -- end elimination - - m_detPermR = m_perm_r.determinant(); - m_detPermC = m_perm_c.determinant(); - - // Count the number of nonzeros in factors - Base::countnz(n, m_nnzL, m_nnzU, m_glu); - // Apply permutation to the L subscripts - Base::fixupL(n, m_perm_r.indices(), m_glu); - - // Create supernode matrix L - m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); - // Create the column major upper sparse matrix U; - new (&m_Ustore) MappedSparseMatrix ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); - - m_info = Success; - m_factorizationIsOk = true; -} - -template -struct SparseLUMatrixLReturnType : internal::no_assignment_operator -{ - typedef typename MappedSupernodalType::Index Index; - typedef typename MappedSupernodalType::Scalar Scalar; - SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL) - { } - Index rows() { return m_mapL.rows(); } - Index cols() { return m_mapL.cols(); } - template - void solveInPlace( MatrixBase &X) const - { - m_mapL.solveInPlace(X); - } - const MappedSupernodalType& m_mapL; -}; - -template -struct SparseLUMatrixUReturnType : internal::no_assignment_operator -{ - typedef typename MatrixLType::Index Index; - typedef typename MatrixLType::Scalar Scalar; - SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) - : m_mapL(mapL),m_mapU(mapU) - { } - Index rows() { return m_mapL.rows(); } - Index cols() { return m_mapL.cols(); } - - template void solveInPlace(MatrixBase &X) const - { - Index nrhs = X.cols(); - Index n = X.rows(); - // Backward solve with U - for (Index k = m_mapL.nsuper(); k >= 0; k--) - { - Index fsupc = m_mapL.supToCol()[k]; - Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension - Index nsupc = m_mapL.supToCol()[k+1] - fsupc; - Index luptr = m_mapL.colIndexPtr()[fsupc]; - - if (nsupc == 1) - { - for (Index j = 0; j < nrhs; j++) - { - X(fsupc, j) /= m_mapL.valuePtr()[luptr]; - } - } - else - { - Map, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); - Map< Matrix, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); - U = A.template triangularView().solve(U); - } - - for (Index j = 0; j < nrhs; ++j) - { - for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++) - { - typename MatrixUType::InnerIterator it(m_mapU, jcol); - for ( ; it; ++it) - { - Index irow = it.index(); - X(irow, j) -= X(jcol, j) * it.value(); - } - } - } - } // End For U-solve - } - const MatrixLType& m_mapL; - const MatrixUType& m_mapU; -}; - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef SparseLU<_MatrixType,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef SparseLU<_MatrixType,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; -} // end namespace internal - -} // End namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLUImpl.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLUImpl.h index 99d651e40..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLUImpl.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLUImpl.h @@ -1,66 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef SPARSELU_IMPL_H -#define SPARSELU_IMPL_H - -namespace Eigen { -namespace internal { - -/** \ingroup SparseLU_Module - * \class SparseLUImpl - * Base class for sparseLU - */ -template -class SparseLUImpl -{ - public: - typedef Matrix ScalarVector; - typedef Matrix ScalarMatrix; - typedef Map > MappedMatrixBlock; - typedef Matrix IndexVector; - typedef typename ScalarVector::RealScalar RealScalar; - typedef Ref > BlockScalarVector; - typedef Ref > BlockIndexVector; - typedef LU_GlobalLU_t GlobalLU_t; - typedef SparseMatrix MatrixType; - - protected: - template - Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions); - Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu); - template - Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions); - void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); - void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); - Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu); - Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu); - Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu); - template - void dfs_kernel(const Index jj, IndexVector& perm_r, - Index& nseg, IndexVector& panel_lsub, IndexVector& segrep, - Ref repfnz_col, IndexVector& xprune, Ref marker, IndexVector& parent, - IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits); - void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); - - void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu); - Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); - Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu); - Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); - void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu); - void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu); - void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu); - - template - friend struct column_dfs_traits; -}; - -} // end namespace internal -} // namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Memory.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Memory.h index 45f96d16a..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Memory.h @@ -1,227 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU - - * -- SuperLU routine (version 3.1) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * August 1, 2008 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ - -#ifndef EIGEN_SPARSELU_MEMORY -#define EIGEN_SPARSELU_MEMORY - -namespace Eigen { -namespace internal { - -enum { LUNoMarker = 3 }; -enum {emptyIdxLU = -1}; -template -inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b) -{ - return (std::max)(m, (t+b)*w); -} - -template< typename Scalar, typename Index> -inline Index LUTempSpace(Index&m, Index& w) -{ - return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar); -} - - - - -/** - * Expand the existing storage to accomodate more fill-ins - * \param vec Valid pointer to the vector to allocate or expand - * \param[in,out] length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector - * \param[in] nbElts Current number of elements in the factors - * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand - * \param[in,out] num_expansions Number of times the memory has been expanded - */ -template -template -Index SparseLUImpl::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) -{ - - float alpha = 1.5; // Ratio of the memory increase - Index new_len; // New size of the allocated memory - - if(num_expansions == 0 || keep_prev) - new_len = length ; // First time allocate requested - else - new_len = (std::max)(length+1,Index(alpha * length)); - - VectorType old_vec; // Temporary vector to hold the previous values - if (nbElts > 0 ) - old_vec = vec.segment(0,nbElts); - - //Allocate or expand the current vector -#ifdef EIGEN_EXCEPTIONS - try -#endif - { - vec.resize(new_len); - } -#ifdef EIGEN_EXCEPTIONS - catch(std::bad_alloc& ) -#else - if(!vec.size()) -#endif - { - if (!num_expansions) - { - // First time to allocate from LUMemInit() - // Let LUMemInit() deals with it. - return -1; - } - if (keep_prev) - { - // In this case, the memory length should not not be reduced - return new_len; - } - else - { - // Reduce the size and increase again - Index tries = 0; // Number of attempts - do - { - alpha = (alpha + 1)/2; - new_len = (std::max)(length+1,Index(alpha * length)); -#ifdef EIGEN_EXCEPTIONS - try -#endif - { - vec.resize(new_len); - } -#ifdef EIGEN_EXCEPTIONS - catch(std::bad_alloc& ) -#else - if (!vec.size()) -#endif - { - tries += 1; - if ( tries > 10) return new_len; - } - } while (!vec.size()); - } - } - //Copy the previous values to the newly allocated space - if (nbElts > 0) - vec.segment(0, nbElts) = old_vec; - - - length = new_len; - if(num_expansions) ++num_expansions; - return 0; -} - -/** - * \brief Allocate various working space for the numerical factorization phase. - * \param m number of rows of the input matrix - * \param n number of columns - * \param annz number of initial nonzeros in the matrix - * \param lwork if lwork=-1, this routine returns an estimated size of the required memory - * \param glu persistent data to facilitate multiple factors : will be deleted later ?? - * \param fillratio estimated ratio of fill in the factors - * \param panel_size Size of a panel - * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success - * \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation - */ -template -Index SparseLUImpl::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu) -{ - Index& num_expansions = glu.num_expansions; //No memory expansions so far - num_expansions = 0; - glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U - glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor - // Return the estimated size to the user if necessary - Index tempSpace; - tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar); - if (lwork == emptyIdxLU) - { - Index estimated_size; - estimated_size = (5 * n + 5) * sizeof(Index) + tempSpace - + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n; - return estimated_size; - } - - // Setup the required space - - // First allocate Integer pointers for L\U factors - glu.xsup.resize(n+1); - glu.supno.resize(n+1); - glu.xlsub.resize(n+1); - glu.xlusup.resize(n+1); - glu.xusub.resize(n+1); - - // Reserve memory for L/U factors - do - { - if( (expand(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0) - || (expand(glu.ucol, glu.nzumax, 0, 0, num_expansions)<0) - || (expand (glu.lsub, glu.nzlmax, 0, 0, num_expansions)<0) - || (expand (glu.usub, glu.nzumax, 0, 1, num_expansions)<0) ) - { - //Reduce the estimated size and retry - glu.nzlumax /= 2; - glu.nzumax /= 2; - glu.nzlmax /= 2; - if (glu.nzlumax < annz ) return glu.nzlumax; - } - } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); - - ++num_expansions; - return 0; - -} // end LuMemInit - -/** - * \brief Expand the existing storage - * \param vec vector to expand - * \param[in,out] maxlen On input, previous size of vec (Number of elements to copy ). on output, new size - * \param nbElts current number of elements in the vector. - * \param memtype Type of the element to expand - * \param num_expansions Number of expansions - * \return 0 on success, > 0 size of the memory allocated so far - */ -template -template -Index SparseLUImpl::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions) -{ - Index failed_size; - if (memtype == USUB) - failed_size = this->expand(vec, maxlen, nbElts, 1, num_expansions); - else - failed_size = this->expand(vec, maxlen, nbElts, 0, num_expansions); - - if (failed_size) - return failed_size; - - return 0 ; -} - -} // end namespace internal - -} // end namespace Eigen -#endif // EIGEN_SPARSELU_MEMORY diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Structs.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Structs.h index 24d6bf179..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Structs.h @@ -1,111 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h - * -- SuperLU routine (version 4.1) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November, 2010 - * - * Global data structures used in LU factorization - - * - * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. - * (xsup,supno): supno[i] is the supernode no to which i belongs; - * xsup(s) points to the beginning of the s-th supernode. - * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) - * xsup 0 1 2 4 7 12 - * Note: dfs will be performed on supernode rep. relative to the new - * row pivoting ordering - * - * (xlsub,lsub): lsub[*] contains the compressed subscript of - * rectangular supernodes; xlsub[j] points to the starting - * location of the j-th column in lsub[*]. Note that xlsub - * is indexed by column. - * Storage: original row subscripts - * - * During the course of sparse LU factorization, we also use - * (xlsub,lsub) for the purpose of symmetric pruning. For each - * supernode {s,s+1,...,t=s+r} with first column s and last - * column t, the subscript set - * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 - * is the structure of column s (i.e. structure of this supernode). - * It is used for the storage of numerical values. - * Furthermore, - * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 - * is the structure of the last column t of this supernode. - * It is for the purpose of symmetric pruning. Therefore, the - * structural subscripts can be rearranged without making physical - * interchanges among the numerical values. - * - * However, if the supernode has only one column, then we - * only keep one set of subscripts. For any subscript interchange - * performed, similar interchange must be done on the numerical - * values. - * - * The last column structures (for pruning) will be removed - * after the numercial LU factorization phase. - * - * (xlusup,lusup): lusup[*] contains the numerical values of the - * rectangular supernodes; xlusup[j] points to the starting - * location of the j-th column in storage vector lusup[*] - * Note: xlusup is indexed by column. - * Each rectangular supernode is stored by column-major - * scheme, consistent with Fortran 2-dim array storage. - * - * (xusub,ucol,usub): ucol[*] stores the numerical values of - * U-columns outside the rectangular supernodes. The row - * subscript of nonzero ucol[k] is stored in usub[k]. - * xusub[i] points to the starting location of column i in ucol. - * Storage: new row subscripts; that is subscripts of PA. - */ - -#ifndef EIGEN_LU_STRUCTS -#define EIGEN_LU_STRUCTS -namespace Eigen { -namespace internal { - -typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; - -template -struct LU_GlobalLU_t { - typedef typename IndexVector::Scalar Index; - IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode - IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) - ScalarVector lusup; // nonzero values of L ordered by columns - IndexVector lsub; // Compressed row indices of L rectangular supernodes. - IndexVector xlusup; // pointers to the beginning of each column in lusup - IndexVector xlsub; // pointers to the beginning of each column in lsub - Index nzlmax; // Current max size of lsub - Index nzlumax; // Current max size of lusup - ScalarVector ucol; // nonzero values of U ordered by columns - IndexVector usub; // row indices of U columns in ucol - IndexVector xusub; // Pointers to the beginning of each column of U in ucol - Index nzumax; // Current max size of ucol - Index n; // Number of columns in the matrix - Index num_expansions; -}; - -// Values to set for performance -template -struct perfvalues { - Index panel_size; // a panel consists of at most consecutive columns - Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns) - // in a subtree of the elimination tree is less than relax, this subtree is considered - // as one supernode regardless of the row structures of those columns - Index maxsuper; // The maximum size for a supernode in complete LU - Index rowblk; // The minimum row dimension for 2-D blocking to be used; - Index colblk; // The minimum column dimension for 2-D blocking to be used; - Index fillfactor; // The estimated fills factors for L and U, compared with A -}; - -} // end namespace internal - -} // end namespace Eigen -#endif // EIGEN_LU_STRUCTS diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h index 54a569408..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h @@ -1,298 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H -#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H - -namespace Eigen { -namespace internal { - -/** \ingroup SparseLU_Module - * \brief a class to manipulate the L supernodal factor from the SparseLU factorization - * - * This class contain the data to easily store - * and manipulate the supernodes during the factorization and solution phase of Sparse LU. - * Only the lower triangular matrix has supernodes. - * - * NOTE : This class corresponds to the SCformat structure in SuperLU - * - */ -/* TODO - * InnerIterator as for sparsematrix - * SuperInnerIterator to iterate through all supernodes - * Function for triangular solve - */ -template -class MappedSuperNodalMatrix -{ - public: - typedef _Scalar Scalar; - typedef _Index Index; - typedef Matrix IndexVector; - typedef Matrix ScalarVector; - public: - MappedSuperNodalMatrix() - { - - } - MappedSuperNodalMatrix(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, - IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) - { - setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); - } - - ~MappedSuperNodalMatrix() - { - - } - /** - * Set appropriate pointers for the lower triangular supernodal matrix - * These infos are available at the end of the numerical factorization - * FIXME This class will be modified such that it can be use in the course - * of the factorization. - */ - void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, - IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) - { - m_row = m; - m_col = n; - m_nzval = nzval.data(); - m_nzval_colptr = nzval_colptr.data(); - m_rowind = rowind.data(); - m_rowind_colptr = rowind_colptr.data(); - m_nsuper = col_to_sup(n); - m_col_to_sup = col_to_sup.data(); - m_sup_to_col = sup_to_col.data(); - } - - /** - * Number of rows - */ - Index rows() { return m_row; } - - /** - * Number of columns - */ - Index cols() { return m_col; } - - /** - * Return the array of nonzero values packed by column - * - * The size is nnz - */ - Scalar* valuePtr() { return m_nzval; } - - const Scalar* valuePtr() const - { - return m_nzval; - } - /** - * Return the pointers to the beginning of each column in \ref valuePtr() - */ - Index* colIndexPtr() - { - return m_nzval_colptr; - } - - const Index* colIndexPtr() const - { - return m_nzval_colptr; - } - - /** - * Return the array of compressed row indices of all supernodes - */ - Index* rowIndex() { return m_rowind; } - - const Index* rowIndex() const - { - return m_rowind; - } - - /** - * Return the location in \em rowvaluePtr() which starts each column - */ - Index* rowIndexPtr() { return m_rowind_colptr; } - - const Index* rowIndexPtr() const - { - return m_rowind_colptr; - } - - /** - * Return the array of column-to-supernode mapping - */ - Index* colToSup() { return m_col_to_sup; } - - const Index* colToSup() const - { - return m_col_to_sup; - } - /** - * Return the array of supernode-to-column mapping - */ - Index* supToCol() { return m_sup_to_col; } - - const Index* supToCol() const - { - return m_sup_to_col; - } - - /** - * Return the number of supernodes - */ - Index nsuper() const - { - return m_nsuper; - } - - class InnerIterator; - template - void solveInPlace( MatrixBase&X) const; - - - - - protected: - Index m_row; // Number of rows - Index m_col; // Number of columns - Index m_nsuper; // Number of supernodes - Scalar* m_nzval; //array of nonzero values packed by column - Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j - Index* m_rowind; // Array of compressed row indices of rectangular supernodes - Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j - Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs - Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode - - private : -}; - -/** - * \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L - * - */ -template -class MappedSuperNodalMatrix::InnerIterator -{ - public: - InnerIterator(const MappedSuperNodalMatrix& mat, Index outer) - : m_matrix(mat), - m_outer(outer), - m_supno(mat.colToSup()[outer]), - m_idval(mat.colIndexPtr()[outer]), - m_startidval(m_idval), - m_endidval(mat.colIndexPtr()[outer+1]), - m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]), - m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1]) - {} - inline InnerIterator& operator++() - { - m_idval++; - m_idrow++; - return *this; - } - inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } - - inline Scalar& valueRef() { return const_cast(m_matrix.valuePtr()[m_idval]); } - - inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } - inline Index row() const { return index(); } - inline Index col() const { return m_outer; } - - inline Index supIndex() const { return m_supno; } - - inline operator bool() const - { - return ( (m_idval < m_endidval) && (m_idval >= m_startidval) - && (m_idrow < m_endidrow) ); - } - - protected: - const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix - const Index m_outer; // Current column - const Index m_supno; // Current SuperNode number - Index m_idval; // Index to browse the values in the current column - const Index m_startidval; // Start of the column value - const Index m_endidval; // End of the column value - Index m_idrow; // Index to browse the row indices - Index m_endidrow; // End index of row indices of the current column -}; - -/** - * \brief Solve with the supernode triangular matrix - * - */ -template -template -void MappedSuperNodalMatrix::solveInPlace( MatrixBase&X) const -{ - Index n = X.rows(); - Index nrhs = X.cols(); - const Scalar * Lval = valuePtr(); // Nonzero values - Matrix work(n, nrhs); // working vector - work.setZero(); - for (Index k = 0; k <= nsuper(); k ++) - { - Index fsupc = supToCol()[k]; // First column of the current supernode - Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column - Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode - Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode - Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode - Index irow; //Current index row - - if (nsupc == 1 ) - { - for (Index j = 0; j < nrhs; j++) - { - InnerIterator it(*this, fsupc); - ++it; // Skip the diagonal element - for (; it; ++it) - { - irow = it.row(); - X(irow, j) -= X(fsupc, j) * it.value(); - } - } - } - else - { - // The supernode has more than one column - Index luptr = colIndexPtr()[fsupc]; - Index lda = colIndexPtr()[fsupc+1] - luptr; - - // Triangular solve - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) ); - Map< Matrix, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); - U = A.template triangularView().solve(U); - - // Matrix-vector product - new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); - work.block(0, 0, nrow, nrhs) = A * U; - - //Begin Scatter - for (Index j = 0; j < nrhs; j++) - { - Index iptr = istart + nsupc; - for (Index i = 0; i < nrow; i++) - { - irow = rowIndex()[iptr]; - X(irow, j) -= work(i, j); // Scatter operation - work(i, j) = Scalar(0); - iptr++; - } - } - } - } -} - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SPARSELU_MATRIX_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Utils.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Utils.h index 15352ac33..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_Utils.h @@ -1,80 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_SPARSELU_UTILS_H -#define EIGEN_SPARSELU_UTILS_H - -namespace Eigen { -namespace internal { - -/** - * \brief Count Nonzero elements in the factors - */ -template -void SparseLUImpl::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu) -{ - nnzL = 0; - nnzU = (glu.xusub)(n); - Index nsuper = (glu.supno)(n); - Index jlen; - Index i, j, fsupc; - if (n <= 0 ) return; - // For each supernode - for (i = 0; i <= nsuper; i++) - { - fsupc = glu.xsup(i); - jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); - - for (j = fsupc; j < glu.xsup(i+1); j++) - { - nnzL += jlen; - nnzU += j - fsupc + 1; - jlen--; - } - } -} - -/** - * \brief Fix up the data storage lsub for L-subscripts. - * - * It removes the subscripts sets for structural pruning, - * and applies permutation to the remaining subscripts - * - */ -template -void SparseLUImpl::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu) -{ - Index fsupc, i, j, k, jstart; - - Index nextl = 0; - Index nsuper = (glu.supno)(n); - - // For each supernode - for (i = 0; i <= nsuper; i++) - { - fsupc = glu.xsup(i); - jstart = glu.xlsub(fsupc); - glu.xlsub(fsupc) = nextl; - for (j = jstart; j < glu.xlsub(fsupc + 1); j++) - { - glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A - nextl++; - } - for (k = fsupc+1; k < glu.xsup(i+1); k++) - glu.xlsub(k) = nextl; // other columns in supernode i - } - - glu.xlsub(n) = nextl; -} - -} // end namespace internal - -} // end namespace Eigen -#endif // EIGEN_SPARSELU_UTILS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_bmod.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_bmod.h index cacc7e987..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -1,180 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU - - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_COLUMN_BMOD_H -#define SPARSELU_COLUMN_BMOD_H - -namespace Eigen { - -namespace internal { -/** - * \brief Performs numeric block updates (sup-col) in topological order - * - * \param jcol current column to update - * \param nseg Number of segments in the U part - * \param dense Store the full representation of the column - * \param tempv working array - * \param segrep segment representative ... - * \param repfnz ??? First nonzero column in each row ??? ... - * \param fpanelc First column in the current panel - * \param glu Global LU data. - * \return 0 - successful return - * > 0 - number of bytes allocated when run out of space - * - */ -template -Index SparseLUImpl::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu) -{ - Index jsupno, k, ksub, krep, ksupno; - Index lptr, nrow, isub, irow, nextlu, new_next, ufirst; - Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; - /* krep = representative of current k-th supernode - * fsupc = first supernodal column - * nsupc = number of columns in a supernode - * nsupr = number of rows in a supernode - * luptr = location of supernodal LU-block in storage - * kfnz = first nonz in the k-th supernodal segment - * no_zeros = no lf leading zeros in a supernodal U-segment - */ - - jsupno = glu.supno(jcol); - // For each nonzero supernode segment of U[*,j] in topological order - k = nseg - 1; - Index d_fsupc; // distance between the first column of the current panel and the - // first column of the current snode - Index fst_col; // First column within small LU update - Index segsize; - for (ksub = 0; ksub < nseg; ksub++) - { - krep = segrep(k); k--; - ksupno = glu.supno(krep); - if (jsupno != ksupno ) - { - // outside the rectangular supernode - fsupc = glu.xsup(ksupno); - fst_col = (std::max)(fsupc, fpanelc); - - // Distance from the current supernode to the current panel; - // d_fsupc = 0 if fsupc > fpanelc - d_fsupc = fst_col - fsupc; - - luptr = glu.xlusup(fst_col) + d_fsupc; - lptr = glu.xlsub(fsupc) + d_fsupc; - - kfnz = repfnz(krep); - kfnz = (std::max)(kfnz, fpanelc); - - segsize = krep - kfnz + 1; - nsupc = krep - fst_col + 1; - nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); - nrow = nsupr - d_fsupc - nsupc; - Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col); - - - // Perform a triangular solver and block update, - // then scatter the result of sup-col update to dense - no_zeros = kfnz - fst_col; - if(segsize==1) - LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); - else - LU_kernel_bmod::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); - } // end if jsupno - } // end for each segment - - // Process the supernodal portion of L\U[*,j] - nextlu = glu.xlusup(jcol); - fsupc = glu.xsup(jsupno); - - // copy the SPA dense into L\U[*,j] - Index mem; - new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); - Index offset = internal::first_multiple(new_next, internal::packet_traits::size) - new_next; - if(offset) - new_next += offset; - while (new_next > glu.nzlumax ) - { - mem = memXpand(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); - if (mem) return mem; - } - - for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) - { - irow = glu.lsub(isub); - glu.lusup(nextlu) = dense(irow); - dense(irow) = Scalar(0.0); - ++nextlu; - } - - if(offset) - { - glu.lusup.segment(nextlu,offset).setZero(); - nextlu += offset; - } - glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); - - /* For more updates within the panel (also within the current supernode), - * should start from the first column of the panel, or the first column - * of the supernode, whichever is bigger. There are two cases: - * 1) fsupc < fpanelc, then fst_col <-- fpanelc - * 2) fsupc >= fpanelc, then fst_col <-- fsupc - */ - fst_col = (std::max)(fsupc, fpanelc); - - if (fst_col < jcol) - { - // Distance between the current supernode and the current panel - // d_fsupc = 0 if fsupc >= fpanelc - d_fsupc = fst_col - fsupc; - - lptr = glu.xlsub(fsupc) + d_fsupc; - luptr = glu.xlusup(fst_col) + d_fsupc; - nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension - nsupc = jcol - fst_col; // excluding jcol - nrow = nsupr - d_fsupc - nsupc; - - // points to the beginning of jcol in snode L\U(jsupno) - ufirst = glu.xlusup(jcol) + d_fsupc; - Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol); - MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); - VectorBlock u(glu.lusup, ufirst, nsupc); - u = A.template triangularView().solve(u); - - new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); - VectorBlock l(glu.lusup, ufirst+nsupc, nrow); - l.noalias() -= A * u; - - } // End if fst_col - return 0; -} - -} // end namespace internal -} // end namespace Eigen - -#endif // SPARSELU_COLUMN_BMOD_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_dfs.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_dfs.h index 4c04b0e44..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -1,177 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU - - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_COLUMN_DFS_H -#define SPARSELU_COLUMN_DFS_H - -template class SparseLUImpl; -namespace Eigen { - -namespace internal { - -template -struct column_dfs_traits : no_assignment_operator -{ - typedef typename ScalarVector::Scalar Scalar; - typedef typename IndexVector::Scalar Index; - column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl::GlobalLU_t& glu, SparseLUImpl& luImpl) - : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl) - {} - bool update_segrep(Index /*krep*/, Index /*jj*/) - { - return true; - } - void mem_expand(IndexVector& lsub, Index& nextl, Index chmark) - { - if (nextl >= m_glu.nzlmax) - m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); - if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU; - } - enum { ExpandMem = true }; - - Index m_jcol; - Index& m_jsuper_ref; - typename SparseLUImpl::GlobalLU_t& m_glu; - SparseLUImpl& m_luImpl; -}; - - -/** - * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary - * - * A supernode representative is the last column of a supernode. - * The nonzeros in U[*,j] are segments that end at supernodes representatives. - * The routine returns a list of the supernodal representatives - * in topological order of the dfs that generates them. - * The location of the first nonzero in each supernodal segment - * (supernodal entry location) is also returned. - * - * \param m number of rows in the matrix - * \param jcol Current column - * \param perm_r Row permutation - * \param maxsuper Maximum number of column allowed in a supernode - * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended - * \param lsub_col defines the rhs vector to start the dfs - * \param [in,out] segrep Segment representatives - new segments appended - * \param repfnz First nonzero location in each row - * \param xprune - * \param marker marker[i] == jj, if i was visited during dfs of current column jj; - * \param parent - * \param xplore working array - * \param glu global LU data - * \return 0 success - * > 0 number of bytes allocated when run out of space - * - */ -template -Index SparseLUImpl::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) -{ - - Index jsuper = glu.supno(jcol); - Index nextl = glu.xlsub(jcol); - VectorBlock marker2(marker, 2*m, m); - - - column_dfs_traits traits(jcol, jsuper, glu, *this); - - // For each nonzero in A(*,jcol) do dfs - for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++) - { - Index krow = lsub_col(k); - lsub_col(k) = emptyIdxLU; - Index kmark = marker2(krow); - - // krow was visited before, go to the next nonz; - if (kmark == jcol) continue; - - dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent, - xplore, glu, nextl, krow, traits); - } // for each nonzero ... - - Index fsupc, jptr, jm1ptr, ito, ifrom, istop; - Index nsuper = glu.supno(jcol); - Index jcolp1 = jcol + 1; - Index jcolm1 = jcol - 1; - - // check to see if j belongs in the same supernode as j-1 - if ( jcol == 0 ) - { // Do nothing for column 0 - nsuper = glu.supno(0) = 0 ; - } - else - { - fsupc = glu.xsup(nsuper); - jptr = glu.xlsub(jcol); // Not yet compressed - jm1ptr = glu.xlsub(jcolm1); - - // Use supernodes of type T2 : see SuperLU paper - if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU; - - // Make sure the number of columns in a supernode doesn't - // exceed threshold - if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU; - - /* If jcol starts a new supernode, reclaim storage space in - * glu.lsub from previous supernode. Note we only store - * the subscript set of the first and last columns of - * a supernode. (first for num values, last for pruning) - */ - if (jsuper == emptyIdxLU) - { // starts a new supernode - if ( (fsupc < jcolm1-1) ) - { // >= 3 columns in nsuper - ito = glu.xlsub(fsupc+1); - glu.xlsub(jcolm1) = ito; - istop = ito + jptr - jm1ptr; - xprune(jcolm1) = istop; // intialize xprune(jcol-1) - glu.xlsub(jcol) = istop; - - for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) - glu.lsub(ito) = glu.lsub(ifrom); - nextl = ito; // = istop + length(jcol) - } - nsuper++; - glu.supno(jcol) = nsuper; - } // if a new supernode - } // end else: jcol > 0 - - // Tidy up the pointers before exit - glu.xsup(nsuper+1) = jcolp1; - glu.supno(jcolp1) = nsuper; - xprune(jcol) = nextl; // Intialize upper bound for pruning - glu.xlsub(jcolp1) = nextl; - - return 0; -} - -} // end namespace internal - -} // end namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 170610d9f..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -1,106 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -/* - - * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU - - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_COPY_TO_UCOL_H -#define SPARSELU_COPY_TO_UCOL_H - -namespace Eigen { -namespace internal { - -/** - * \brief Performs numeric block updates (sup-col) in topological order - * - * \param jcol current column to update - * \param nseg Number of segments in the U part - * \param segrep segment representative ... - * \param repfnz First nonzero column in each row ... - * \param perm_r Row permutation - * \param dense Store the full representation of the column - * \param glu Global LU data. - * \return 0 - successful return - * > 0 - number of bytes allocated when run out of space - * - */ -template -Index SparseLUImpl::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu) -{ - Index ksub, krep, ksupno; - - Index jsupno = glu.supno(jcol); - - // For each nonzero supernode segment of U[*,j] in topological order - Index k = nseg - 1, i; - Index nextu = glu.xusub(jcol); - Index kfnz, isub, segsize; - Index new_next,irow; - Index fsupc, mem; - for (ksub = 0; ksub < nseg; ksub++) - { - krep = segrep(k); k--; - ksupno = glu.supno(krep); - if (jsupno != ksupno ) // should go into ucol(); - { - kfnz = repfnz(krep); - if (kfnz != emptyIdxLU) - { // Nonzero U-segment - fsupc = glu.xsup(ksupno); - isub = glu.xlsub(fsupc) + kfnz - fsupc; - segsize = krep - kfnz + 1; - new_next = nextu + segsize; - while (new_next > glu.nzumax) - { - mem = memXpand(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); - if (mem) return mem; - mem = memXpand(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); - if (mem) return mem; - - } - - for (i = 0; i < segsize; i++) - { - irow = glu.lsub(isub); - glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order - glu.ucol(nextu) = dense(irow); - dense(irow) = Scalar(0.0); - nextu++; - isub++; - } - - } // end nonzero U-segment - - } // end if jsupno - - } // end for each segment - glu.xusub(jcol + 1) = nextu; // close U(*,jcol) - return 0; -} - -} // namespace internal -} // end namespace Eigen - -#endif // SPARSELU_COPY_TO_UCOL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_gemm_kernel.h index 9e4e3e72b..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_gemm_kernel.h @@ -1,279 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H -#define EIGEN_SPARSELU_GEMM_KERNEL_H - -namespace Eigen { - -namespace internal { - - -/** \internal - * A general matrix-matrix product kernel optimized for the SparseLU factorization. - * - A, B, and C must be column major - * - lda and ldc must be multiples of the respective packet size - * - C must have the same alignment as A - */ -template -EIGEN_DONT_INLINE -void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc) -{ - using namespace Eigen::internal; - - typedef typename packet_traits::type Packet; - enum { - NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, - PacketSize = packet_traits::size, - PM = 8, // peeling in M - RN = 2, // register blocking - RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking - BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk - SM = PM*PacketSize // step along M - }; - Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking - Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once - Index i0 = internal::first_aligned(A,m); - - eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_aligned(C,m))); - - // handle the non aligned rows of A and C without any optimization: - for(Index i=0; i(BM, m-ib); // actual number of rows - Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling - Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization - - // Let's process two columns of B-C at once - for(Index j=0; j(Bc0[0]); - b10 = pset1(Bc0[1]); - if(RK==4) b20 = pset1(Bc0[2]); - if(RK==4) b30 = pset1(Bc0[3]); - b01 = pset1(Bc1[0]); - b11 = pset1(Bc1[1]); - if(RK==4) b21 = pset1(Bc1[2]); - if(RK==4) b31 = pset1(Bc1[3]); - - Packet a0, a1, a2, a3, c0, c1, t0, t1; - - const Scalar* A0 = A+ib+(k+0)*lda; - const Scalar* A1 = A+ib+(k+1)*lda; - const Scalar* A2 = A+ib+(k+2)*lda; - const Scalar* A3 = A+ib+(k+3)*lda; - - Scalar* C0 = C+ib+(j+0)*ldc; - Scalar* C1 = C+ib+(j+1)*ldc; - - a0 = pload(A0); - a1 = pload(A1); - if(RK==4) - { - a2 = pload(A2); - a3 = pload(A3); - } - else - { - // workaround "may be used uninitialized in this function" warning - a2 = a3 = a0; - } - -#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);} -#define WORK(I) \ - c0 = pload(C0+i+(I)*PacketSize); \ - c1 = pload(C1+i+(I)*PacketSize); \ - KMADD(c0, a0, b00, t0) \ - KMADD(c1, a0, b01, t1) \ - a0 = pload(A0+i+(I+1)*PacketSize); \ - KMADD(c0, a1, b10, t0) \ - KMADD(c1, a1, b11, t1) \ - a1 = pload(A1+i+(I+1)*PacketSize); \ - if(RK==4) KMADD(c0, a2, b20, t0) \ - if(RK==4) KMADD(c1, a2, b21, t1) \ - if(RK==4) a2 = pload(A2+i+(I+1)*PacketSize); \ - if(RK==4) KMADD(c0, a3, b30, t0) \ - if(RK==4) KMADD(c1, a3, b31, t1) \ - if(RK==4) a3 = pload(A3+i+(I+1)*PacketSize); \ - pstore(C0+i+(I)*PacketSize, c0); \ - pstore(C1+i+(I)*PacketSize, c1) - - // process rows of A' - C' with aggressive vectorization and peeling - for(Index i=0; i0) - { - const Scalar* Bc0 = B+(n-1)*ldb; - - for(Index k=0; k(Bc0[0]); - b10 = pset1(Bc0[1]); - if(RK==4) b20 = pset1(Bc0[2]); - if(RK==4) b30 = pset1(Bc0[3]); - - Packet a0, a1, a2, a3, c0, t0/*, t1*/; - - const Scalar* A0 = A+ib+(k+0)*lda; - const Scalar* A1 = A+ib+(k+1)*lda; - const Scalar* A2 = A+ib+(k+2)*lda; - const Scalar* A3 = A+ib+(k+3)*lda; - - Scalar* C0 = C+ib+(n_end)*ldc; - - a0 = pload(A0); - a1 = pload(A1); - if(RK==4) - { - a2 = pload(A2); - a3 = pload(A3); - } - else - { - // workaround "may be used uninitialized in this function" warning - a2 = a3 = a0; - } - -#define WORK(I) \ - c0 = pload(C0+i+(I)*PacketSize); \ - KMADD(c0, a0, b00, t0) \ - a0 = pload(A0+i+(I+1)*PacketSize); \ - KMADD(c0, a1, b10, t0) \ - a1 = pload(A1+i+(I+1)*PacketSize); \ - if(RK==4) KMADD(c0, a2, b20, t0) \ - if(RK==4) a2 = pload(A2+i+(I+1)*PacketSize); \ - if(RK==4) KMADD(c0, a3, b30, t0) \ - if(RK==4) a3 = pload(A3+i+(I+1)*PacketSize); \ - pstore(C0+i+(I)*PacketSize, c0); - - // agressive vectorization and peeling - for(Index i=0; i0) - { - for(Index j=0; j1 ? Aligned : 0 - }; - typedef Map, Alignment > MapVector; - typedef Map, Alignment > ConstMapVector; - if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b); - - else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b) - + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b); - - else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b) - + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b) - + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b); - } - } - - } // blocking on the rows of A and C -} -#undef KMADD - -} // namespace internal - -} // namespace Eigen - -#endif // EIGEN_SPARSELU_GEMM_KERNEL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 7a4e4305a..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -1,127 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* This file is a modified version of heap_relax_snode.c file in SuperLU - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ - -#ifndef SPARSELU_HEAP_RELAX_SNODE_H -#define SPARSELU_HEAP_RELAX_SNODE_H - -namespace Eigen { -namespace internal { - -/** - * \brief Identify the initial relaxed supernodes - * - * This routine applied to a symmetric elimination tree. - * It assumes that the matrix has been reordered according to the postorder of the etree - * \param n The number of columns - * \param et elimination tree - * \param relax_columns Maximum number of columns allowed in a relaxed snode - * \param descendants Number of descendants of each node in the etree - * \param relax_end last column in a supernode - */ -template -void SparseLUImpl::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end) -{ - - // The etree may not be postordered, but its heap ordered - IndexVector post; - internal::treePostorder(n, et, post); // Post order etree - IndexVector inv_post(n+1); - Index i; - for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? - - // Renumber etree in postorder - IndexVector iwork(n); - IndexVector et_save(n+1); - for (i = 0; i < n; ++i) - { - iwork(post(i)) = post(et(i)); - } - et_save = et; // Save the original etree - et = iwork; - - // compute the number of descendants of each node in the etree - relax_end.setConstant(emptyIdxLU); - Index j, parent; - descendants.setZero(); - for (j = 0; j < n; j++) - { - parent = et(j); - if (parent != n) // not the dummy root - descendants(parent) += descendants(j) + 1; - } - // Identify the relaxed supernodes by postorder traversal of the etree - Index snode_start; // beginning of a snode - Index k; - Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree - Index nsuper_et = 0; // Number of relaxed snodes in the original etree - Index l; - for (j = 0; j < n; ) - { - parent = et(j); - snode_start = j; - while ( parent != n && descendants(parent) < relax_columns ) - { - j = parent; - parent = et(j); - } - // Found a supernode in postordered etree, j is the last column - ++nsuper_et_post; - k = n; - for (i = snode_start; i <= j; ++i) - k = (std::min)(k, inv_post(i)); - l = inv_post(j); - if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode - { - // This is also a supernode in the original etree - relax_end(k) = l; // Record last column - ++nsuper_et; - } - else - { - for (i = snode_start; i <= j; ++i) - { - l = inv_post(i); - if (descendants(i) == 0) - { - relax_end(l) = l; - ++nsuper_et; - } - } - } - j++; - // Search for a new leaf - while (descendants(j) != 0 && j < n) j++; - } // End postorder traversal of the etree - - // Recover the original etree - et = et_save; -} - -} // end namespace internal - -} // end namespace Eigen -#endif // SPARSELU_HEAP_RELAX_SNODE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index 6af026754..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -1,130 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef SPARSELU_KERNEL_BMOD_H -#define SPARSELU_KERNEL_BMOD_H - -namespace Eigen { -namespace internal { - -/** - * \brief Performs numeric block updates from a given supernode to a single column - * - * \param segsize Size of the segment (and blocks ) to use for updates - * \param[in,out] dense Packed values of the original matrix - * \param tempv temporary vector to use for updates - * \param lusup array containing the supernodes - * \param lda Leading dimension in the supernode - * \param nrow Number of rows in the rectangular part of the supernode - * \param lsub compressed row subscripts of supernodes - * \param lptr pointer to the first column of the current supernode in lsub - * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode - * \return 0 on success - */ -template struct LU_kernel_bmod -{ - template - static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, - const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); -}; - -template -template -EIGEN_DONT_INLINE void LU_kernel_bmod::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, - const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros) -{ - typedef typename ScalarVector::Scalar Scalar; - // First, copy U[*,j] segment from dense(*) to tempv(*) - // The result of triangular solve is in tempv[*]; - // The result of matric-vector update is in dense[*] - Index isub = lptr + no_zeros; - int i; - Index irow; - for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) - { - irow = lsub(isub); - tempv(i) = dense(irow); - ++isub; - } - // Dense triangular solve -- start effective triangle - luptr += lda * no_zeros + no_zeros; - // Form Eigen matrix and vector - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) ); - Map > u(tempv.data(), segsize); - - u = A.template triangularView().solve(u); - - // Dense matrix-vector product y <-- B*x - luptr += segsize; - const Index PacketSize = internal::packet_traits::size; - Index ldl = internal::first_multiple(nrow, PacketSize); - Map, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) ); - Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize); - Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize; - Map, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) ); - - l.setZero(); - internal::sparselu_gemm(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride()); - - // Scatter tempv[] into SPA dense[] as a temporary storage - isub = lptr + no_zeros; - for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) - { - irow = lsub(isub++); - dense(irow) = tempv(i); - } - - // Scatter l into SPA dense[] - for (i = 0; i < nrow; i++) - { - irow = lsub(isub++); - dense(irow) -= l(i); - } -} - -template <> struct LU_kernel_bmod<1> -{ - template - static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, - const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); -}; - - -template -EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, - const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros) -{ - typedef typename ScalarVector::Scalar Scalar; - Scalar f = dense(lsub(lptr + no_zeros)); - luptr += lda * no_zeros + no_zeros + 1; - const Scalar* a(lusup.data() + luptr); - const /*typename IndexVector::Scalar*/Index* irow(lsub.data()+lptr + no_zeros + 1); - Index i = 0; - for (; i+1 < nrow; i+=2) - { - Index i0 = *(irow++); - Index i1 = *(irow++); - Scalar a0 = *(a++); - Scalar a1 = *(a++); - Scalar d0 = dense.coeff(i0); - Scalar d1 = dense.coeff(i1); - d0 -= f*a0; - d1 -= f*a1; - dense.coeffRef(i0) = d0; - dense.coeffRef(i1) = d1; - } - if(i -// Copyright (C) 2012 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU - - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_PANEL_BMOD_H -#define SPARSELU_PANEL_BMOD_H - -namespace Eigen { -namespace internal { - -/** - * \brief Performs numeric block updates (sup-panel) in topological order. - * - * Before entering this routine, the original nonzeros in the panel - * were already copied i nto the spa[m,w] - * - * \param m number of rows in the matrix - * \param w Panel size - * \param jcol Starting column of the panel - * \param nseg Number of segments in the U part - * \param dense Store the full representation of the panel - * \param tempv working array - * \param segrep segment representative... first row in the segment - * \param repfnz First nonzero rows - * \param glu Global LU data. - * - * - */ -template -void SparseLUImpl::panel_bmod(const Index m, const Index w, const Index jcol, - const Index nseg, ScalarVector& dense, ScalarVector& tempv, - IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu) -{ - - Index ksub,jj,nextl_col; - Index fsupc, nsupc, nsupr, nrow; - Index krep, kfnz; - Index lptr; // points to the row subscripts of a supernode - Index luptr; // ... - Index segsize,no_zeros ; - // For each nonz supernode segment of U[*,j] in topological order - Index k = nseg - 1; - const Index PacketSize = internal::packet_traits::size; - - for (ksub = 0; ksub < nseg; ksub++) - { // For each updating supernode - /* krep = representative of current k-th supernode - * fsupc = first supernodal column - * nsupc = number of columns in a supernode - * nsupr = number of rows in a supernode - */ - krep = segrep(k); k--; - fsupc = glu.xsup(glu.supno(krep)); - nsupc = krep - fsupc + 1; - nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); - nrow = nsupr - nsupc; - lptr = glu.xlsub(fsupc); - - // loop over the panel columns to detect the actual number of columns and rows - Index u_rows = 0; - Index u_cols = 0; - for (jj = jcol; jj < jcol + w; jj++) - { - nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row - - kfnz = repfnz_col(krep); - if ( kfnz == emptyIdxLU ) - continue; // skip any zero segment - - segsize = krep - kfnz + 1; - u_cols++; - u_rows = (std::max)(segsize,u_rows); - } - - if(nsupc >= 2) - { - Index ldu = internal::first_multiple(u_rows, PacketSize); - Map > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu)); - - // gather U - Index u_col = 0; - for (jj = jcol; jj < jcol + w; jj++) - { - nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row - VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here - - kfnz = repfnz_col(krep); - if ( kfnz == emptyIdxLU ) - continue; // skip any zero segment - - segsize = krep - kfnz + 1; - luptr = glu.xlusup(fsupc); - no_zeros = kfnz - fsupc; - - Index isub = lptr + no_zeros; - Index off = u_rows-segsize; - for (Index i = 0; i < off; i++) U(i,u_col) = 0; - for (Index i = 0; i < segsize; i++) - { - Index irow = glu.lsub(isub); - U(i+off,u_col) = dense_col(irow); - ++isub; - } - u_col++; - } - // solve U = A^-1 U - luptr = glu.xlusup(fsupc); - Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); - no_zeros = (krep - u_rows + 1) - fsupc; - luptr += lda * no_zeros + no_zeros; - MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) ); - U = A.template triangularView().solve(U); - - // update - luptr += u_rows; - MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) ); - eigen_assert(tempv.size()>w*ldu + nrow*w + 1); - - Index ldl = internal::first_multiple(nrow, PacketSize); - Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize; - MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl)); - - L.setZero(); - internal::sparselu_gemm(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride()); - - // scatter U and L - u_col = 0; - for (jj = jcol; jj < jcol + w; jj++) - { - nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row - VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here - - kfnz = repfnz_col(krep); - if ( kfnz == emptyIdxLU ) - continue; // skip any zero segment - - segsize = krep - kfnz + 1; - no_zeros = kfnz - fsupc; - Index isub = lptr + no_zeros; - - Index off = u_rows-segsize; - for (Index i = 0; i < segsize; i++) - { - Index irow = glu.lsub(isub++); - dense_col(irow) = U.coeff(i+off,u_col); - U.coeffRef(i+off,u_col) = 0; - } - - // Scatter l into SPA dense[] - for (Index i = 0; i < nrow; i++) - { - Index irow = glu.lsub(isub++); - dense_col(irow) -= L.coeff(i,u_col); - L.coeffRef(i,u_col) = 0; - } - u_col++; - } - } - else // level 2 only - { - // Sequence through each column in the panel - for (jj = jcol; jj < jcol + w; jj++) - { - nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row - VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here - - kfnz = repfnz_col(krep); - if ( kfnz == emptyIdxLU ) - continue; // skip any zero segment - - segsize = krep - kfnz + 1; - luptr = glu.xlusup(fsupc); - - Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr - - // Perform a trianglar solve and block update, - // then scatter the result of sup-col update to dense[] - no_zeros = kfnz - fsupc; - if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); - else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); - else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); - else LU_kernel_bmod::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); - } // End for each column in the panel - } - - } // End for each updating supernode -} // end panel bmod - -} // end namespace internal - -} // end namespace Eigen - -#endif // SPARSELU_PANEL_BMOD_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_panel_dfs.h index dc0054efd..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -1,258 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU - - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_PANEL_DFS_H -#define SPARSELU_PANEL_DFS_H - -namespace Eigen { - -namespace internal { - -template -struct panel_dfs_traits -{ - typedef typename IndexVector::Scalar Index; - panel_dfs_traits(Index jcol, Index* marker) - : m_jcol(jcol), m_marker(marker) - {} - bool update_segrep(Index krep, Index jj) - { - if(m_marker[krep] -template -void SparseLUImpl::dfs_kernel(const Index jj, IndexVector& perm_r, - Index& nseg, IndexVector& panel_lsub, IndexVector& segrep, - Ref repfnz_col, IndexVector& xprune, Ref marker, IndexVector& parent, - IndexVector& xplore, GlobalLU_t& glu, - Index& nextl_col, Index krow, Traits& traits - ) -{ - - Index kmark = marker(krow); - - // For each unmarked krow of jj - marker(krow) = jj; - Index kperm = perm_r(krow); - if (kperm == emptyIdxLU ) { - // krow is in L : place it in structure of L(*, jj) - panel_lsub(nextl_col++) = krow; // krow is indexed into A - - traits.mem_expand(panel_lsub, nextl_col, kmark); - } - else - { - // krow is in U : if its supernode-representative krep - // has been explored, update repfnz(*) - // krep = supernode representative of the current row - Index krep = glu.xsup(glu.supno(kperm)+1) - 1; - // First nonzero element in the current column: - Index myfnz = repfnz_col(krep); - - if (myfnz != emptyIdxLU ) - { - // Representative visited before - if (myfnz > kperm ) repfnz_col(krep) = kperm; - - } - else - { - // Otherwise, perform dfs starting at krep - Index oldrep = emptyIdxLU; - parent(krep) = oldrep; - repfnz_col(krep) = kperm; - Index xdfs = glu.xlsub(krep); - Index maxdfs = xprune(krep); - - Index kpar; - do - { - // For each unmarked kchild of krep - while (xdfs < maxdfs) - { - Index kchild = glu.lsub(xdfs); - xdfs++; - Index chmark = marker(kchild); - - if (chmark != jj ) - { - marker(kchild) = jj; - Index chperm = perm_r(kchild); - - if (chperm == emptyIdxLU) - { - // case kchild is in L: place it in L(*, j) - panel_lsub(nextl_col++) = kchild; - traits.mem_expand(panel_lsub, nextl_col, chmark); - } - else - { - // case kchild is in U : - // chrep = its supernode-rep. If its rep has been explored, - // update its repfnz(*) - Index chrep = glu.xsup(glu.supno(chperm)+1) - 1; - myfnz = repfnz_col(chrep); - - if (myfnz != emptyIdxLU) - { // Visited before - if (myfnz > chperm) - repfnz_col(chrep) = chperm; - } - else - { // Cont. dfs at snode-rep of kchild - xplore(krep) = xdfs; - oldrep = krep; - krep = chrep; // Go deeper down G(L) - parent(krep) = oldrep; - repfnz_col(krep) = chperm; - xdfs = glu.xlsub(krep); - maxdfs = xprune(krep); - - } // end if myfnz != -1 - } // end if chperm == -1 - - } // end if chmark !=jj - } // end while xdfs < maxdfs - - // krow has no more unexplored nbrs : - // Place snode-rep krep in postorder DFS, if this - // segment is seen for the first time. (Note that - // "repfnz(krep)" may change later.) - // Baktrack dfs to its parent - if(traits.update_segrep(krep,jj)) - //if (marker1(krep) < jcol ) - { - segrep(nseg) = krep; - ++nseg; - //marker1(krep) = jj; - } - - kpar = parent(krep); // Pop recursion, mimic recursion - if (kpar == emptyIdxLU) - break; // dfs done - krep = kpar; - xdfs = xplore(krep); - maxdfs = xprune(krep); - - } while (kpar != emptyIdxLU); // Do until empty stack - - } // end if (myfnz = -1) - - } // end if (kperm == -1) -} - -/** - * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w) - * - * A supernode representative is the last column of a supernode. - * The nonzeros in U[*,j] are segments that end at supernodes representatives - * - * The routine returns a list of the supernodal representatives - * in topological order of the dfs that generates them. This list is - * a superset of the topological order of each individual column within - * the panel. - * The location of the first nonzero in each supernodal segment - * (supernodal entry location) is also returned. Each column has - * a separate list for this purpose. - * - * Two markers arrays are used for dfs : - * marker[i] == jj, if i was visited during dfs of current column jj; - * marker1[i] >= jcol, if i was visited by earlier columns in this panel; - * - * \param[in] m number of rows in the matrix - * \param[in] w Panel size - * \param[in] jcol Starting column of the panel - * \param[in] A Input matrix in column-major storage - * \param[in] perm_r Row permutation - * \param[out] nseg Number of U segments - * \param[out] dense Accumulate the column vectors of the panel - * \param[out] panel_lsub Subscripts of the row in the panel - * \param[out] segrep Segment representative i.e first nonzero row of each segment - * \param[out] repfnz First nonzero location in each row - * \param[out] xprune The pruned elimination tree - * \param[out] marker work vector - * \param parent The elimination tree - * \param xplore work vector - * \param glu The global data structure - * - */ - -template -void SparseLUImpl::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) -{ - Index nextl_col; // Next available position in panel_lsub[*,jj] - - // Initialize pointers - VectorBlock marker1(marker, m, m); - nseg = 0; - - panel_dfs_traits traits(jcol, marker1.data()); - - // For each column in the panel - for (Index jj = jcol; jj < jcol + w; jj++) - { - nextl_col = (jj - jcol) * m; - - VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row - VectorBlock dense_col(dense,nextl_col, m); // Accumulate a column vector here - - - // For each nnz in A[*, jj] do depth first search - for (typename MatrixType::InnerIterator it(A, jj); it; ++it) - { - Index krow = it.row(); - dense_col(krow) = it.value(); - - Index kmark = marker(krow); - if (kmark == jj) - continue; // krow visited before, go to the next nonzero - - dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent, - xplore, glu, nextl_col, krow, traits); - }// end for nonzeros in column jj - - } // end for column jj -} - -} // end namespace internal -} // end namespace Eigen - -#endif // SPARSELU_PANEL_DFS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pivotL.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pivotL.h index 2e49ef667..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -1,137 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of xpivotL.c file in SuperLU - - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_PIVOTL_H -#define SPARSELU_PIVOTL_H - -namespace Eigen { -namespace internal { - -/** - * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation. - * - * Pivot policy : - * (1) Compute thresh = u * max_(i>=j) abs(A_ij); - * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN - * pivot row = k; - * ELSE IF abs(A_jj) >= thresh THEN - * pivot row = j; - * ELSE - * pivot row = m; - * - * Note: If you absolutely want to use a given pivot order, then set u=0.0. - * - * \param jcol The current column of L - * \param diagpivotthresh diagonal pivoting threshold - * \param[in,out] perm_r Row permutation (threshold pivoting) - * \param[in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' - * \param[out] pivrow The pivot row - * \param glu Global LU data - * \return 0 if success, i > 0 if U(i,i) is exactly zero - * - */ -template -Index SparseLUImpl::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu) -{ - - Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol - Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 - Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion - Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode - Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension - Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode - Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode - Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode - - // Determine the largest abs numerical value for partial pivoting - Index diagind = iperm_c(jcol); // diagonal index - RealScalar pivmax(-1.0); - Index pivptr = nsupc; - Index diag = emptyIdxLU; - RealScalar rtemp; - Index isub, icol, itemp, k; - for (isub = nsupc; isub < nsupr; ++isub) { - using std::abs; - rtemp = abs(lu_col_ptr[isub]); - if (rtemp > pivmax) { - pivmax = rtemp; - pivptr = isub; - } - if (lsub_ptr[isub] == diagind) diag = isub; - } - - // Test for singularity - if ( pivmax <= RealScalar(0.0) ) { - // if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero - pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr]; - perm_r(pivrow) = jcol; - return (jcol+1); - } - - RealScalar thresh = diagpivotthresh * pivmax; - - // Choose appropriate pivotal element - - { - // Test if the diagonal element can be used as a pivot (given the threshold value) - if (diag >= 0 ) - { - // Diagonal element exists - using std::abs; - rtemp = abs(lu_col_ptr[diag]); - if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag; - } - pivrow = lsub_ptr[pivptr]; - } - - // Record pivot row - perm_r(pivrow) = jcol; - // Interchange row subscripts - if (pivptr != nsupc ) - { - std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] ); - // Interchange numerical values as well, for the two rows in the whole snode - // such that L is indexed the same way as A - for (icol = 0; icol <= nsupc; icol++) - { - itemp = pivptr + icol * lda; - std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]); - } - } - // cdiv operations - Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc]; - for (k = nsupc+1; k < nsupr; k++) - lu_col_ptr[k] *= temp; - return 0; -} - -} // end namespace internal -} // end namespace Eigen - -#endif // SPARSELU_PIVOTL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pruneL.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pruneL.h index 66460d168..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -1,135 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU - - * -- SuperLU routine (version 2.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * November 15, 1997 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ -#ifndef SPARSELU_PRUNEL_H -#define SPARSELU_PRUNEL_H - -namespace Eigen { -namespace internal { - -/** - * \brief Prunes the L-structure. - * - * It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow" - * - * - * \param jcol The current column of L - * \param[in] perm_r Row permutation - * \param[out] pivrow The pivot row - * \param nseg Number of segments - * \param segrep - * \param repfnz - * \param[out] xprune - * \param glu Global LU data - * - */ -template -void SparseLUImpl::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu) -{ - // For each supernode-rep irep in U(*,j] - Index jsupno = glu.supno(jcol); - Index i,irep,irep1; - bool movnum, do_prune = false; - Index kmin = 0, kmax = 0, minloc, maxloc,krow; - for (i = 0; i < nseg; i++) - { - irep = segrep(i); - irep1 = irep + 1; - do_prune = false; - - // Don't prune with a zero U-segment - if (repfnz(irep) == emptyIdxLU) continue; - - // If a snode overlaps with the next panel, then the U-segment - // is fragmented into two parts -- irep and irep1. We should let - // pruning occur at the rep-column in irep1s snode. - if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune - - // If it has not been pruned & it has a nonz in row L(pivrow,i) - if (glu.supno(irep) != jsupno ) - { - if ( xprune (irep) >= glu.xlsub(irep1) ) - { - kmin = glu.xlsub(irep); - kmax = glu.xlsub(irep1) - 1; - for (krow = kmin; krow <= kmax; krow++) - { - if (glu.lsub(krow) == pivrow) - { - do_prune = true; - break; - } - } - } - - if (do_prune) - { - // do a quicksort-type partition - // movnum=true means that the num values have to be exchanged - movnum = false; - if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1 - movnum = true; - - while (kmin <= kmax) - { - if (perm_r(glu.lsub(kmax)) == emptyIdxLU) - kmax--; - else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU) - kmin++; - else - { - // kmin below pivrow (not yet pivoted), and kmax - // above pivrow: interchange the two suscripts - std::swap(glu.lsub(kmin), glu.lsub(kmax)); - - // If the supernode has only one column, then we - // only keep one set of subscripts. For any subscript - // intercnahge performed, similar interchange must be - // done on the numerical values. - if (movnum) - { - minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) ); - maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) ); - std::swap(glu.lusup(minloc), glu.lusup(maxloc)); - } - kmin++; - kmax--; - } - } // end while - - xprune(irep) = kmin; //Pruning - } // end if do_prune - } // end pruning - } // End for each U-segment -} - -} // end namespace internal -} // end namespace Eigen - -#endif // SPARSELU_PRUNEL_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_relax_snode.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_relax_snode.h index 58ec32e27..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_relax_snode.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -1,83 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012 Désiré Nuentsa-Wakam -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* This file is a modified version of heap_relax_snode.c file in SuperLU - * -- SuperLU routine (version 3.0) -- - * Univ. of California Berkeley, Xerox Palo Alto Research Center, - * and Lawrence Berkeley National Lab. - * October 15, 2003 - * - * Copyright (c) 1994 by Xerox Corporation. All rights reserved. - * - * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY - * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. - * - * Permission is hereby granted to use or copy this program for any - * purpose, provided the above notices are retained on all copies. - * Permission to modify the code and to distribute modified code is - * granted, provided the above notices are retained, and a notice that - * the code was modified is included with the above copyright notice. - */ - -#ifndef SPARSELU_RELAX_SNODE_H -#define SPARSELU_RELAX_SNODE_H - -namespace Eigen { - -namespace internal { - -/** - * \brief Identify the initial relaxed supernodes - * - * This routine is applied to a column elimination tree. - * It assumes that the matrix has been reordered according to the postorder of the etree - * \param n the number of columns - * \param et elimination tree - * \param relax_columns Maximum number of columns allowed in a relaxed snode - * \param descendants Number of descendants of each node in the etree - * \param relax_end last column in a supernode - */ -template -void SparseLUImpl::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end) -{ - - // compute the number of descendants of each node in the etree - Index j, parent; - relax_end.setConstant(emptyIdxLU); - descendants.setZero(); - for (j = 0; j < n; j++) - { - parent = et(j); - if (parent != n) // not the dummy root - descendants(parent) += descendants(j) + 1; - } - // Identify the relaxed supernodes by postorder traversal of the etree - Index snode_start; // beginning of a snode - for (j = 0; j < n; ) - { - parent = et(j); - snode_start = j; - while ( parent != n && descendants(parent) < relax_columns ) - { - j = parent; - parent = et(j); - } - // Found a supernode in postordered etree, j is the last column - relax_end(snode_start) = j; // Record last column - j++; - // Search for a new leaf - while (descendants(j) != 0 && j < n) j++; - } // End postorder traversal of the etree - -} - -} // end namespace internal - -} // end namespace Eigen -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/CMakeLists.txt index f9ddf2bdb..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SparseQR_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SparseQR_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseQR/ COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/SparseQR.h b/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/SparseQR.h index a00bd5db1..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/SparseQR.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SparseQR/SparseQR.h @@ -1,714 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2012-2013 Desire Nuentsa -// Copyright (C) 2012-2014 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_QR_H -#define EIGEN_SPARSE_QR_H - -namespace Eigen { - -template class SparseQR; -template struct SparseQRMatrixQReturnType; -template struct SparseQRMatrixQTransposeReturnType; -template struct SparseQR_QProduct; -namespace internal { - template struct traits > - { - typedef typename SparseQRType::MatrixType ReturnType; - typedef typename ReturnType::Index Index; - typedef typename ReturnType::StorageKind StorageKind; - }; - template struct traits > - { - typedef typename SparseQRType::MatrixType ReturnType; - }; - template struct traits > - { - typedef typename Derived::PlainObject ReturnType; - }; -} // End namespace internal - -/** - * \ingroup SparseQR_Module - * \class SparseQR - * \brief Sparse left-looking rank-revealing QR factorization - * - * This class implements a left-looking rank-revealing QR decomposition - * of sparse matrices. When a column has a norm less than a given tolerance - * it is implicitly permuted to the end. The QR factorization thus obtained is - * given by A*P = Q*R where R is upper triangular or trapezoidal. - * - * P is the column permutation which is the product of the fill-reducing and the - * rank-revealing permutations. Use colsPermutation() to get it. - * - * Q is the orthogonal matrix represented as products of Householder reflectors. - * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. - * You can then apply it to a vector. - * - * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient. - * matrixR().topLeftCorner(rank(), rank()) always returns a triangular factor of full rank. - * - * \tparam _MatrixType The type of the sparse matrix A, must be a column-major SparseMatrix<> - * \tparam _OrderingType The fill-reducing ordering method. See the \link OrderingMethods_Module - * OrderingMethods \endlink module for the list of built-in and external ordering methods. - * - * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). - * - */ -template -class SparseQR -{ - public: - typedef _MatrixType MatrixType; - typedef _OrderingType OrderingType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef SparseMatrix QRMatrixType; - typedef Matrix IndexVector; - typedef Matrix ScalarVector; - typedef PermutationMatrix PermutationType; - public: - SparseQR () : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) - { } - - /** Construct a QR factorization of the matrix \a mat. - * - * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - * - * \sa compute() - */ - SparseQR(const MatrixType& mat) : m_isInitialized(false), m_analysisIsok(false), m_lastError(""), m_useDefaultThreshold(true),m_isQSorted(false),m_isEtreeOk(false) - { - compute(mat); - } - - /** Computes the QR factorization of the sparse matrix \a mat. - * - * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - * - * \sa analyzePattern(), factorize() - */ - void compute(const MatrixType& mat) - { - analyzePattern(mat); - factorize(mat); - } - void analyzePattern(const MatrixType& mat); - void factorize(const MatrixType& mat); - - /** \returns the number of rows of the represented matrix. - */ - inline Index rows() const { return m_pmat.rows(); } - - /** \returns the number of columns of the represented matrix. - */ - inline Index cols() const { return m_pmat.cols();} - - /** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization. - */ - const QRMatrixType& matrixR() const { return m_R; } - - /** \returns the number of non linearly dependent columns as determined by the pivoting threshold. - * - * \sa setPivotThreshold() - */ - Index rank() const - { - eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); - return m_nonzeropivots; - } - - /** \returns an expression of the matrix Q as products of sparse Householder reflectors. - * The common usage of this function is to apply it to a dense matrix or vector - * \code - * VectorXd B1, B2; - * // Initialize B1 - * B2 = matrixQ() * B1; - * \endcode - * - * To get a plain SparseMatrix representation of Q: - * \code - * SparseMatrix Q; - * Q = SparseQR >(A).matrixQ(); - * \endcode - * Internally, this call simply performs a sparse product between the matrix Q - * and a sparse identity matrix. However, due to the fact that the sparse - * reflectors are stored unsorted, two transpositions are needed to sort - * them before performing the product. - */ - SparseQRMatrixQReturnType matrixQ() const - { return SparseQRMatrixQReturnType(*this); } - - /** \returns a const reference to the column permutation P that was applied to A such that A*P = Q*R - * It is the combination of the fill-in reducing permutation and numerical column pivoting. - */ - const PermutationType& colsPermutation() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_outputPerm_c; - } - - /** \returns A string describing the type of error. - * This method is provided to ease debugging, not to handle errors. - */ - std::string lastErrorMessage() const { return m_lastError; } - - /** \internal */ - template - bool _solve(const MatrixBase &B, MatrixBase &dest) const - { - eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); - eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); - - Index rank = this->rank(); - - // Compute Q^T * b; - typename Dest::PlainObject y, b; - y = this->matrixQ().transpose() * B; - b = y; - - // Solve with the triangular matrix R - y.resize((std::max)(cols(),Index(y.rows())),y.cols()); - y.topRows(rank) = this->matrixR().topLeftCorner(rank, rank).template triangularView().solve(b.topRows(rank)); - y.bottomRows(y.rows()-rank).setZero(); - - // Apply the column permutation - if (m_perm_c.size()) dest = colsPermutation() * y.topRows(cols()); - else dest = y.topRows(cols()); - - m_info = Success; - return true; - } - - - /** Sets the threshold that is used to determine linearly dependent columns during the factorization. - * - * In practice, if during the factorization the norm of the column that has to be eliminated is below - * this threshold, then the entire column is treated as zero, and it is moved at the end. - */ - void setPivotThreshold(const RealScalar& threshold) - { - m_useDefaultThreshold = false; - m_threshold = threshold; - } - - /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval solve(const MatrixBase& B) const - { - eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); - eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); - return internal::solve_retval(*this, B.derived()); - } - template - inline const internal::sparse_solve_retval solve(const SparseMatrixBase& B) const - { - eigen_assert(m_isInitialized && "The factorization should be called first, use compute()"); - eigen_assert(this->rows() == B.rows() && "SparseQR::solve() : invalid number of rows in the right hand side matrix"); - return internal::sparse_solve_retval(*this, B.derived()); - } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was successful, - * \c NumericalIssue if the QR factorization reports a numerical problem - * \c InvalidInput if the input matrix is invalid - * - * \sa iparm() - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - protected: - inline void sort_matrix_Q() - { - if(this->m_isQSorted) return; - // The matrix Q is sorted during the transposition - SparseMatrix mQrm(this->m_Q); - this->m_Q = mQrm; - this->m_isQSorted = true; - } - - - protected: - bool m_isInitialized; - bool m_analysisIsok; - bool m_factorizationIsok; - mutable ComputationInfo m_info; - std::string m_lastError; - QRMatrixType m_pmat; // Temporary matrix - QRMatrixType m_R; // The triangular factor matrix - QRMatrixType m_Q; // The orthogonal reflectors - ScalarVector m_hcoeffs; // The Householder coefficients - PermutationType m_perm_c; // Fill-reducing Column permutation - PermutationType m_pivotperm; // The permutation for rank revealing - PermutationType m_outputPerm_c; // The final column permutation - RealScalar m_threshold; // Threshold to determine null Householder reflections - bool m_useDefaultThreshold; // Use default threshold - Index m_nonzeropivots; // Number of non zero pivots found - IndexVector m_etree; // Column elimination tree - IndexVector m_firstRowElt; // First element in each row - bool m_isQSorted; // whether Q is sorted or not - bool m_isEtreeOk; // whether the elimination tree match the initial input matrix - - template friend struct SparseQR_QProduct; - template friend struct SparseQRMatrixQReturnType; - -}; - -/** \brief Preprocessing step of a QR factorization - * - * \warning The matrix \a mat must be in compressed mode (see SparseMatrix::makeCompressed()). - * - * In this step, the fill-reducing permutation is computed and applied to the columns of A - * and the column elimination tree is computed as well. Only the sparsity pattern of \a mat is exploited. - * - * \note In this step it is assumed that there is no empty row in the matrix \a mat. - */ -template -void SparseQR::analyzePattern(const MatrixType& mat) -{ - eigen_assert(mat.isCompressed() && "SparseQR requires a sparse matrix in compressed mode. Call .makeCompressed() before passing it to SparseQR"); - // Copy to a column major matrix if the input is rowmajor - typename internal::conditional::type matCpy(mat); - // Compute the column fill reducing ordering - OrderingType ord; - ord(matCpy, m_perm_c); - Index n = mat.cols(); - Index m = mat.rows(); - Index diagSize = (std::min)(m,n); - - if (!m_perm_c.size()) - { - m_perm_c.resize(n); - m_perm_c.indices().setLinSpaced(n, 0,n-1); - } - - // Compute the column elimination tree of the permuted matrix - m_outputPerm_c = m_perm_c.inverse(); - internal::coletree(matCpy, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); - m_isEtreeOk = true; - - m_R.resize(m, n); - m_Q.resize(m, diagSize); - - // Allocate space for nonzero elements : rough estimation - m_R.reserve(2*mat.nonZeros()); //FIXME Get a more accurate estimation through symbolic factorization with the etree - m_Q.reserve(2*mat.nonZeros()); - m_hcoeffs.resize(diagSize); - m_analysisIsok = true; -} - -/** \brief Performs the numerical QR factorization of the input matrix - * - * The function SparseQR::analyzePattern(const MatrixType&) must have been called beforehand with - * a matrix having the same sparsity pattern than \a mat. - * - * \param mat The sparse column-major matrix - */ -template -void SparseQR::factorize(const MatrixType& mat) -{ - using std::abs; - using std::max; - - eigen_assert(m_analysisIsok && "analyzePattern() should be called before this step"); - Index m = mat.rows(); - Index n = mat.cols(); - Index diagSize = (std::min)(m,n); - IndexVector mark((std::max)(m,n)); mark.setConstant(-1); // Record the visited nodes - IndexVector Ridx(n), Qidx(m); // Store temporarily the row indexes for the current column of R and Q - Index nzcolR, nzcolQ; // Number of nonzero for the current column of R and Q - ScalarVector tval(m); // The dense vector used to compute the current column - RealScalar pivotThreshold = m_threshold; - - m_R.setZero(); - m_Q.setZero(); - m_pmat = mat; - if(!m_isEtreeOk) - { - m_outputPerm_c = m_perm_c.inverse(); - internal::coletree(m_pmat, m_etree, m_firstRowElt, m_outputPerm_c.indices().data()); - m_isEtreeOk = true; - } - - m_pmat.uncompress(); // To have the innerNonZeroPtr allocated - - // Apply the fill-in reducing permutation lazily: - { - // If the input is row major, copy the original column indices, - // otherwise directly use the input matrix - // - IndexVector originalOuterIndicesCpy; - const Index *originalOuterIndices = mat.outerIndexPtr(); - if(MatrixType::IsRowMajor) - { - originalOuterIndicesCpy = IndexVector::Map(m_pmat.outerIndexPtr(),n+1); - originalOuterIndices = originalOuterIndicesCpy.data(); - } - - for (int i = 0; i < n; i++) - { - Index p = m_perm_c.size() ? m_perm_c.indices()(i) : i; - m_pmat.outerIndexPtr()[p] = originalOuterIndices[i]; - m_pmat.innerNonZeroPtr()[p] = originalOuterIndices[i+1] - originalOuterIndices[i]; - } - } - - /* Compute the default threshold as in MatLab, see: - * Tim Davis, "Algorithm 915, SuiteSparseQR: Multifrontal Multithreaded Rank-Revealing - * Sparse QR Factorization, ACM Trans. on Math. Soft. 38(1), 2011, Page 8:3 - */ - if(m_useDefaultThreshold) - { - RealScalar max2Norm = 0.0; - for (int j = 0; j < n; j++) max2Norm = (max)(max2Norm, m_pmat.col(j).norm()); - if(max2Norm==RealScalar(0)) - max2Norm = RealScalar(1); - pivotThreshold = 20 * (m + n) * max2Norm * NumTraits::epsilon(); - } - - // Initialize the numerical permutation - m_pivotperm.setIdentity(n); - - Index nonzeroCol = 0; // Record the number of valid pivots - m_Q.startVec(0); - - // Left looking rank-revealing QR factorization: compute a column of R and Q at a time - for (Index col = 0; col < n; ++col) - { - mark.setConstant(-1); - m_R.startVec(col); - mark(nonzeroCol) = col; - Qidx(0) = nonzeroCol; - nzcolR = 0; nzcolQ = 1; - bool found_diag = nonzeroCol>=m; - tval.setZero(); - - // Symbolic factorization: find the nonzero locations of the column k of the factors R and Q, i.e., - // all the nodes (with indexes lower than rank) reachable through the column elimination tree (etree) rooted at node k. - // Note: if the diagonal entry does not exist, then its contribution must be explicitly added, - // thus the trick with found_diag that permits to do one more iteration on the diagonal element if this one has not been found. - for (typename QRMatrixType::InnerIterator itp(m_pmat, col); itp || !found_diag; ++itp) - { - Index curIdx = nonzeroCol; - if(itp) curIdx = itp.row(); - if(curIdx == nonzeroCol) found_diag = true; - - // Get the nonzeros indexes of the current column of R - Index st = m_firstRowElt(curIdx); // The traversal of the etree starts here - if (st < 0 ) - { - m_lastError = "Empty row found during numerical factorization"; - m_info = InvalidInput; - return; - } - - // Traverse the etree - Index bi = nzcolR; - for (; mark(st) != col; st = m_etree(st)) - { - Ridx(nzcolR) = st; // Add this row to the list, - mark(st) = col; // and mark this row as visited - nzcolR++; - } - - // Reverse the list to get the topological ordering - Index nt = nzcolR-bi; - for(Index i = 0; i < nt/2; i++) std::swap(Ridx(bi+i), Ridx(nzcolR-i-1)); - - // Copy the current (curIdx,pcol) value of the input matrix - if(itp) tval(curIdx) = itp.value(); - else tval(curIdx) = Scalar(0); - - // Compute the pattern of Q(:,k) - if(curIdx > nonzeroCol && mark(curIdx) != col ) - { - Qidx(nzcolQ) = curIdx; // Add this row to the pattern of Q, - mark(curIdx) = col; // and mark it as visited - nzcolQ++; - } - } - - // Browse all the indexes of R(:,col) in reverse order - for (Index i = nzcolR-1; i >= 0; i--) - { - Index curIdx = Ridx(i); - - // Apply the curIdx-th householder vector to the current column (temporarily stored into tval) - Scalar tdot(0); - - // First compute q' * tval - tdot = m_Q.col(curIdx).dot(tval); - - tdot *= m_hcoeffs(curIdx); - - // Then update tval = tval - q * tau - // FIXME: tval -= tdot * m_Q.col(curIdx) should amount to the same (need to check/add support for efficient "dense ?= sparse") - for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq) - tval(itq.row()) -= itq.value() * tdot; - - // Detect fill-in for the current column of Q - if(m_etree(Ridx(i)) == nonzeroCol) - { - for (typename QRMatrixType::InnerIterator itq(m_Q, curIdx); itq; ++itq) - { - Index iQ = itq.row(); - if (mark(iQ) != col) - { - Qidx(nzcolQ++) = iQ; // Add this row to the pattern of Q, - mark(iQ) = col; // and mark it as visited - } - } - } - } // End update current column - - Scalar tau = 0; - RealScalar beta = 0; - - if(nonzeroCol < diagSize) - { - // Compute the Householder reflection that eliminate the current column - // FIXME this step should call the Householder module. - Scalar c0 = nzcolQ ? tval(Qidx(0)) : Scalar(0); - - // First, the squared norm of Q((col+1):m, col) - RealScalar sqrNorm = 0.; - for (Index itq = 1; itq < nzcolQ; ++itq) sqrNorm += numext::abs2(tval(Qidx(itq))); - if(sqrNorm == RealScalar(0) && numext::imag(c0) == RealScalar(0)) - { - beta = numext::real(c0); - tval(Qidx(0)) = 1; - } - else - { - using std::sqrt; - beta = sqrt(numext::abs2(c0) + sqrNorm); - if(numext::real(c0) >= RealScalar(0)) - beta = -beta; - tval(Qidx(0)) = 1; - for (Index itq = 1; itq < nzcolQ; ++itq) - tval(Qidx(itq)) /= (c0 - beta); - tau = numext::conj((beta-c0) / beta); - - } - } - - // Insert values in R - for (Index i = nzcolR-1; i >= 0; i--) - { - Index curIdx = Ridx(i); - if(curIdx < nonzeroCol) - { - m_R.insertBackByOuterInnerUnordered(col, curIdx) = tval(curIdx); - tval(curIdx) = Scalar(0.); - } - } - - if(nonzeroCol < diagSize && abs(beta) >= pivotThreshold) - { - m_R.insertBackByOuterInner(col, nonzeroCol) = beta; - // The householder coefficient - m_hcoeffs(nonzeroCol) = tau; - // Record the householder reflections - for (Index itq = 0; itq < nzcolQ; ++itq) - { - Index iQ = Qidx(itq); - m_Q.insertBackByOuterInnerUnordered(nonzeroCol,iQ) = tval(iQ); - tval(iQ) = Scalar(0.); - } - nonzeroCol++; - if(nonzeroCol -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef SparseQR<_MatrixType,OrderingType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef SparseQR<_MatrixType, OrderingType> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec, Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; -} // end namespace internal - -template -struct SparseQR_QProduct : ReturnByValue > -{ - typedef typename SparseQRType::QRMatrixType MatrixType; - typedef typename SparseQRType::Scalar Scalar; - typedef typename SparseQRType::Index Index; - // Get the references - SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : - m_qr(qr),m_other(other),m_transpose(transpose) {} - inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); } - inline Index cols() const { return m_other.cols(); } - - // Assign to a vector - template - void evalTo(DesType& res) const - { - Index m = m_qr.rows(); - Index n = m_qr.cols(); - Index diagSize = (std::min)(m,n); - res = m_other; - if (m_transpose) - { - eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes"); - //Compute res = Q' * other column by column - for(Index j = 0; j < res.cols(); j++){ - for (Index k = 0; k < diagSize; k++) - { - Scalar tau = Scalar(0); - tau = m_qr.m_Q.col(k).dot(res.col(j)); - if(tau==Scalar(0)) continue; - tau = tau * m_qr.m_hcoeffs(k); - res.col(j) -= tau * m_qr.m_Q.col(k); - } - } - } - else - { - eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes"); - // Compute res = Q * other column by column - for(Index j = 0; j < res.cols(); j++) - { - for (Index k = diagSize-1; k >=0; k--) - { - Scalar tau = Scalar(0); - tau = m_qr.m_Q.col(k).dot(res.col(j)); - if(tau==Scalar(0)) continue; - tau = tau * m_qr.m_hcoeffs(k); - res.col(j) -= tau * m_qr.m_Q.col(k); - } - } - } - } - - const SparseQRType& m_qr; - const Derived& m_other; - bool m_transpose; -}; - -template -struct SparseQRMatrixQReturnType : public EigenBase > -{ - typedef typename SparseQRType::Index Index; - typedef typename SparseQRType::Scalar Scalar; - typedef Matrix DenseMatrix; - SparseQRMatrixQReturnType(const SparseQRType& qr) : m_qr(qr) {} - template - SparseQR_QProduct operator*(const MatrixBase& other) - { - return SparseQR_QProduct(m_qr,other.derived(),false); - } - SparseQRMatrixQTransposeReturnType adjoint() const - { - return SparseQRMatrixQTransposeReturnType(m_qr); - } - inline Index rows() const { return m_qr.rows(); } - inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); } - // To use for operations with the transpose of Q - SparseQRMatrixQTransposeReturnType transpose() const - { - return SparseQRMatrixQTransposeReturnType(m_qr); - } - template void evalTo(MatrixBase& dest) const - { - dest.derived() = m_qr.matrixQ() * Dest::Identity(m_qr.rows(), m_qr.rows()); - } - template void evalTo(SparseMatrixBase& dest) const - { - Dest idMat(m_qr.rows(), m_qr.rows()); - idMat.setIdentity(); - // Sort the sparse householder reflectors if needed - const_cast(&m_qr)->sort_matrix_Q(); - dest.derived() = SparseQR_QProduct(m_qr, idMat, false); - } - - const SparseQRType& m_qr; -}; - -template -struct SparseQRMatrixQTransposeReturnType -{ - SparseQRMatrixQTransposeReturnType(const SparseQRType& qr) : m_qr(qr) {} - template - SparseQR_QProduct operator*(const MatrixBase& other) - { - return SparseQR_QProduct(m_qr,other.derived(), true); - } - const SparseQRType& m_qr; -}; - -} // end namespace Eigen - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/CMakeLists.txt index 0f094f637..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_StlSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_StlSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/StlSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdDeque.h b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdDeque.h index 69a46b2b8..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdDeque.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdDeque.h @@ -1,126 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STDDEQUE_H -#define EIGEN_STDDEQUE_H - -#include "details.h" - -/** - * This section contains a convenience MACRO which allows an easy specialization of - * std::deque such that for data types with alignment issues the correct allocator - * is used automatically. - */ -#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) \ -namespace std \ -{ \ - template<> \ - class deque<__VA_ARGS__, std::allocator<__VA_ARGS__> > \ - : public deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ - { \ - typedef deque<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > deque_base; \ - public: \ - typedef __VA_ARGS__ value_type; \ - typedef deque_base::allocator_type allocator_type; \ - typedef deque_base::size_type size_type; \ - typedef deque_base::iterator iterator; \ - explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \ - template \ - deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : deque_base(first, last, a) {} \ - deque(const deque& c) : deque_base(c) {} \ - explicit deque(size_type num, const value_type& val = value_type()) : deque_base(num, val) {} \ - deque(iterator start, iterator end) : deque_base(start, end) {} \ - deque& operator=(const deque& x) { \ - deque_base::operator=(x); \ - return *this; \ - } \ - }; \ -} - -// check whether we really need the std::deque specialization -#if !(defined(_GLIBCXX_DEQUE) && (!EIGEN_GNUC_AT_LEAST(4,1))) /* Note that before gcc-4.1 we already have: std::deque::resize(size_type,const T&). */ - -namespace std { - -#define EIGEN_STD_DEQUE_SPECIALIZATION_BODY \ - public: \ - typedef T value_type; \ - typedef typename deque_base::allocator_type allocator_type; \ - typedef typename deque_base::size_type size_type; \ - typedef typename deque_base::iterator iterator; \ - typedef typename deque_base::const_iterator const_iterator; \ - explicit deque(const allocator_type& a = allocator_type()) : deque_base(a) {} \ - template \ - deque(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \ - : deque_base(first, last, a) {} \ - deque(const deque& c) : deque_base(c) {} \ - explicit deque(size_type num, const value_type& val = value_type()) : deque_base(num, val) {} \ - deque(iterator start, iterator end) : deque_base(start, end) {} \ - deque& operator=(const deque& x) { \ - deque_base::operator=(x); \ - return *this; \ - } - - template - class deque > - : public deque > -{ - typedef deque > deque_base; - EIGEN_STD_DEQUE_SPECIALIZATION_BODY - - void resize(size_type new_size) - { resize(new_size, T()); } - -#if defined(_DEQUE_) - // workaround MSVC std::deque implementation - void resize(size_type new_size, const value_type& x) - { - if (deque_base::size() < new_size) - deque_base::_Insert_n(deque_base::end(), new_size - deque_base::size(), x); - else if (new_size < deque_base::size()) - deque_base::erase(deque_base::begin() + new_size, deque_base::end()); - } - void push_back(const value_type& x) - { deque_base::push_back(x); } - void push_front(const value_type& x) - { deque_base::push_front(x); } - using deque_base::insert; - iterator insert(const_iterator position, const value_type& x) - { return deque_base::insert(position,x); } - void insert(const_iterator position, size_type new_size, const value_type& x) - { deque_base::insert(position, new_size, x); } -#elif defined(_GLIBCXX_DEQUE) && EIGEN_GNUC_AT_LEAST(4,2) - // workaround GCC std::deque implementation - void resize(size_type new_size, const value_type& x) - { - if (new_size < deque_base::size()) - deque_base::_M_erase_at_end(this->_M_impl._M_start + new_size); - else - deque_base::insert(deque_base::end(), new_size - deque_base::size(), x); - } -#else - // either GCC 4.1 or non-GCC - // default implementation which should always work. - void resize(size_type new_size, const value_type& x) - { - if (new_size < deque_base::size()) - deque_base::erase(deque_base::begin() + new_size, deque_base::end()); - else if (new_size > deque_base::size()) - deque_base::insert(deque_base::end(), new_size - deque_base::size(), x); - } -#endif - }; -} - -#endif // check whether specialization is actually required - -#endif // EIGEN_STDDEQUE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdList.h b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdList.h index 050c2373e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdList.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdList.h @@ -1,106 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STDLIST_H -#define EIGEN_STDLIST_H - -#include "details.h" - -/** - * This section contains a convenience MACRO which allows an easy specialization of - * std::list such that for data types with alignment issues the correct allocator - * is used automatically. - */ -#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) \ -namespace std \ -{ \ - template<> \ - class list<__VA_ARGS__, std::allocator<__VA_ARGS__> > \ - : public list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ - { \ - typedef list<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > list_base; \ - public: \ - typedef __VA_ARGS__ value_type; \ - typedef list_base::allocator_type allocator_type; \ - typedef list_base::size_type size_type; \ - typedef list_base::iterator iterator; \ - explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \ - template \ - list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : list_base(first, last, a) {} \ - list(const list& c) : list_base(c) {} \ - explicit list(size_type num, const value_type& val = value_type()) : list_base(num, val) {} \ - list(iterator start, iterator end) : list_base(start, end) {} \ - list& operator=(const list& x) { \ - list_base::operator=(x); \ - return *this; \ - } \ - }; \ -} - -// check whether we really need the std::vector specialization -#if !(defined(_GLIBCXX_VECTOR) && (!EIGEN_GNUC_AT_LEAST(4,1))) /* Note that before gcc-4.1 we already have: std::list::resize(size_type,const T&). */ - -namespace std -{ - -#define EIGEN_STD_LIST_SPECIALIZATION_BODY \ - public: \ - typedef T value_type; \ - typedef typename list_base::allocator_type allocator_type; \ - typedef typename list_base::size_type size_type; \ - typedef typename list_base::iterator iterator; \ - typedef typename list_base::const_iterator const_iterator; \ - explicit list(const allocator_type& a = allocator_type()) : list_base(a) {} \ - template \ - list(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \ - : list_base(first, last, a) {} \ - list(const list& c) : list_base(c) {} \ - explicit list(size_type num, const value_type& val = value_type()) : list_base(num, val) {} \ - list(iterator start, iterator end) : list_base(start, end) {} \ - list& operator=(const list& x) { \ - list_base::operator=(x); \ - return *this; \ - } - - template - class list > - : public list > - { - typedef list > list_base; - EIGEN_STD_LIST_SPECIALIZATION_BODY - - void resize(size_type new_size) - { resize(new_size, T()); } - - void resize(size_type new_size, const value_type& x) - { - if (list_base::size() < new_size) - list_base::insert(list_base::end(), new_size - list_base::size(), x); - else - while (new_size < list_base::size()) list_base::pop_back(); - } - -#if defined(_LIST_) - // workaround MSVC std::list implementation - void push_back(const value_type& x) - { list_base::push_back(x); } - using list_base::insert; - iterator insert(const_iterator position, const value_type& x) - { return list_base::insert(position,x); } - void insert(const_iterator position, size_type new_size, const value_type& x) - { list_base::insert(position, new_size, x); } -#endif - }; -} - -#endif // check whether specialization is actually required - -#endif // EIGEN_STDLIST_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdVector.h b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdVector.h index 611664a2e..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdVector.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/StdVector.h @@ -1,126 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STDVECTOR_H -#define EIGEN_STDVECTOR_H - -#include "details.h" - -/** - * This section contains a convenience MACRO which allows an easy specialization of - * std::vector such that for data types with alignment issues the correct allocator - * is used automatically. - */ -#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) \ -namespace std \ -{ \ - template<> \ - class vector<__VA_ARGS__, std::allocator<__VA_ARGS__> > \ - : public vector<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > \ - { \ - typedef vector<__VA_ARGS__, EIGEN_ALIGNED_ALLOCATOR<__VA_ARGS__> > vector_base; \ - public: \ - typedef __VA_ARGS__ value_type; \ - typedef vector_base::allocator_type allocator_type; \ - typedef vector_base::size_type size_type; \ - typedef vector_base::iterator iterator; \ - explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {} \ - template \ - vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) : vector_base(first, last, a) {} \ - vector(const vector& c) : vector_base(c) {} \ - explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \ - vector(iterator start, iterator end) : vector_base(start, end) {} \ - vector& operator=(const vector& x) { \ - vector_base::operator=(x); \ - return *this; \ - } \ - }; \ -} - -namespace std { - -#define EIGEN_STD_VECTOR_SPECIALIZATION_BODY \ - public: \ - typedef T value_type; \ - typedef typename vector_base::allocator_type allocator_type; \ - typedef typename vector_base::size_type size_type; \ - typedef typename vector_base::iterator iterator; \ - typedef typename vector_base::const_iterator const_iterator; \ - explicit vector(const allocator_type& a = allocator_type()) : vector_base(a) {} \ - template \ - vector(InputIterator first, InputIterator last, const allocator_type& a = allocator_type()) \ - : vector_base(first, last, a) {} \ - vector(const vector& c) : vector_base(c) {} \ - explicit vector(size_type num, const value_type& val = value_type()) : vector_base(num, val) {} \ - vector(iterator start, iterator end) : vector_base(start, end) {} \ - vector& operator=(const vector& x) { \ - vector_base::operator=(x); \ - return *this; \ - } - - template - class vector > - : public vector > -{ - typedef vector > vector_base; - EIGEN_STD_VECTOR_SPECIALIZATION_BODY - - void resize(size_type new_size) - { resize(new_size, T()); } - -#if defined(_VECTOR_) - // workaround MSVC std::vector implementation - void resize(size_type new_size, const value_type& x) - { - if (vector_base::size() < new_size) - vector_base::_Insert_n(vector_base::end(), new_size - vector_base::size(), x); - else if (new_size < vector_base::size()) - vector_base::erase(vector_base::begin() + new_size, vector_base::end()); - } - void push_back(const value_type& x) - { vector_base::push_back(x); } - using vector_base::insert; - iterator insert(const_iterator position, const value_type& x) - { return vector_base::insert(position,x); } - void insert(const_iterator position, size_type new_size, const value_type& x) - { vector_base::insert(position, new_size, x); } -#elif defined(_GLIBCXX_VECTOR) && (!(EIGEN_GNUC_AT_LEAST(4,1))) - /* Note that before gcc-4.1 we already have: std::vector::resize(size_type,const T&). - * However, this specialization is still needed to make the above EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION trick to work. */ - void resize(size_type new_size, const value_type& x) - { - vector_base::resize(new_size,x); - } -#elif defined(_GLIBCXX_VECTOR) && EIGEN_GNUC_AT_LEAST(4,2) - // workaround GCC std::vector implementation - void resize(size_type new_size, const value_type& x) - { - if (new_size < vector_base::size()) - vector_base::_M_erase_at_end(this->_M_impl._M_start + new_size); - else - vector_base::insert(vector_base::end(), new_size - vector_base::size(), x); - } -#else - // either GCC 4.1 or non-GCC - // default implementation which should always work. - void resize(size_type new_size, const value_type& x) - { - if (new_size < vector_base::size()) - vector_base::erase(vector_base::begin() + new_size, vector_base::end()); - else if (new_size > vector_base::size()) - vector_base::insert(vector_base::end(), new_size - vector_base::size(), x); - } -#endif - }; -} - -#endif // EIGEN_STDVECTOR_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/details.h b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/details.h index d8debc7c4..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/details.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/StlSupport/details.h @@ -1,84 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// Copyright (C) 2009 Hauke Heibel -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_STL_DETAILS_H -#define EIGEN_STL_DETAILS_H - -#ifndef EIGEN_ALIGNED_ALLOCATOR - #define EIGEN_ALIGNED_ALLOCATOR Eigen::aligned_allocator -#endif - -namespace Eigen { - - // This one is needed to prevent reimplementing the whole std::vector. - template - class aligned_allocator_indirection : public EIGEN_ALIGNED_ALLOCATOR - { - public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; - - template - struct rebind - { - typedef aligned_allocator_indirection other; - }; - - aligned_allocator_indirection() {} - aligned_allocator_indirection(const aligned_allocator_indirection& ) : EIGEN_ALIGNED_ALLOCATOR() {} - aligned_allocator_indirection(const EIGEN_ALIGNED_ALLOCATOR& ) {} - template - aligned_allocator_indirection(const aligned_allocator_indirection& ) {} - template - aligned_allocator_indirection(const EIGEN_ALIGNED_ALLOCATOR& ) {} - ~aligned_allocator_indirection() {} - }; - -#ifdef _MSC_VER - - // sometimes, MSVC detects, at compile time, that the argument x - // in std::vector::resize(size_t s,T x) won't be aligned and generate an error - // even if this function is never called. Whence this little wrapper. -#define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) \ - typename Eigen::internal::conditional< \ - Eigen::internal::is_arithmetic::value, \ - T, \ - Eigen::internal::workaround_msvc_stl_support \ - >::type - - namespace internal { - template struct workaround_msvc_stl_support : public T - { - inline workaround_msvc_stl_support() : T() {} - inline workaround_msvc_stl_support(const T& other) : T(other) {} - inline operator T& () { return *static_cast(this); } - inline operator const T& () const { return *static_cast(this); } - template - inline T& operator=(const OtherT& other) - { T::operator=(other); return *this; } - inline workaround_msvc_stl_support& operator=(const workaround_msvc_stl_support& other) - { T::operator=(other); return *this; } - }; - } - -#else - -#define EIGEN_WORKAROUND_MSVC_STL_SUPPORT(T) T - -#endif - -} - -#endif // EIGEN_STL_DETAILS_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/CMakeLists.txt index b28ebe583..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SuperLUSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SuperLUSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SuperLUSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/SuperLUSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/SuperLUSupport.h index bcb355760..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -1,1026 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SUPERLUSUPPORT_H -#define EIGEN_SUPERLUSUPPORT_H - -namespace Eigen { - -#define DECL_GSSVX(PREFIX,FLOATTYPE,KEYTYPE) \ - extern "C" { \ - typedef struct { FLOATTYPE for_lu; FLOATTYPE total_needed; int expansions; } PREFIX##mem_usage_t; \ - extern void PREFIX##gssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ - char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ - void *, int, SuperMatrix *, SuperMatrix *, \ - FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, FLOATTYPE *, \ - PREFIX##mem_usage_t *, SuperLUStat_t *, int *); \ - } \ - inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A, \ - int *perm_c, int *perm_r, int *etree, char *equed, \ - FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L, \ - SuperMatrix *U, void *work, int lwork, \ - SuperMatrix *B, SuperMatrix *X, \ - FLOATTYPE *recip_pivot_growth, \ - FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE *berr, \ - SuperLUStat_t *stats, int *info, KEYTYPE) { \ - PREFIX##mem_usage_t mem_usage; \ - PREFIX##gssvx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ - U, work, lwork, B, X, recip_pivot_growth, rcond, \ - ferr, berr, &mem_usage, stats, info); \ - return mem_usage.for_lu; /* bytes used by the factor storage */ \ - } - -DECL_GSSVX(s,float,float) -DECL_GSSVX(c,float,std::complex) -DECL_GSSVX(d,double,double) -DECL_GSSVX(z,double,std::complex) - -#ifdef MILU_ALPHA -#define EIGEN_SUPERLU_HAS_ILU -#endif - -#ifdef EIGEN_SUPERLU_HAS_ILU - -// similarly for the incomplete factorization using gsisx -#define DECL_GSISX(PREFIX,FLOATTYPE,KEYTYPE) \ - extern "C" { \ - extern void PREFIX##gsisx(superlu_options_t *, SuperMatrix *, int *, int *, int *, \ - char *, FLOATTYPE *, FLOATTYPE *, SuperMatrix *, SuperMatrix *, \ - void *, int, SuperMatrix *, SuperMatrix *, FLOATTYPE *, FLOATTYPE *, \ - PREFIX##mem_usage_t *, SuperLUStat_t *, int *); \ - } \ - inline float SuperLU_gsisx(superlu_options_t *options, SuperMatrix *A, \ - int *perm_c, int *perm_r, int *etree, char *equed, \ - FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L, \ - SuperMatrix *U, void *work, int lwork, \ - SuperMatrix *B, SuperMatrix *X, \ - FLOATTYPE *recip_pivot_growth, \ - FLOATTYPE *rcond, \ - SuperLUStat_t *stats, int *info, KEYTYPE) { \ - PREFIX##mem_usage_t mem_usage; \ - PREFIX##gsisx(options, A, perm_c, perm_r, etree, equed, R, C, L, \ - U, work, lwork, B, X, recip_pivot_growth, rcond, \ - &mem_usage, stats, info); \ - return mem_usage.for_lu; /* bytes used by the factor storage */ \ - } - -DECL_GSISX(s,float,float) -DECL_GSISX(c,float,std::complex) -DECL_GSISX(d,double,double) -DECL_GSISX(z,double,std::complex) - -#endif - -template -struct SluMatrixMapHelper; - -/** \internal - * - * A wrapper class for SuperLU matrices. It supports only compressed sparse matrices - * and dense matrices. Supernodal and other fancy format are not supported by this wrapper. - * - * This wrapper class mainly aims to avoids the need of dynamic allocation of the storage structure. - */ -struct SluMatrix : SuperMatrix -{ - SluMatrix() - { - Store = &storage; - } - - SluMatrix(const SluMatrix& other) - : SuperMatrix(other) - { - Store = &storage; - storage = other.storage; - } - - SluMatrix& operator=(const SluMatrix& other) - { - SuperMatrix::operator=(static_cast(other)); - Store = &storage; - storage = other.storage; - return *this; - } - - struct - { - union {int nnz;int lda;}; - void *values; - int *innerInd; - int *outerInd; - } storage; - - void setStorageType(Stype_t t) - { - Stype = t; - if (t==SLU_NC || t==SLU_NR || t==SLU_DN) - Store = &storage; - else - { - eigen_assert(false && "storage type not supported"); - Store = 0; - } - } - - template - void setScalarType() - { - if (internal::is_same::value) - Dtype = SLU_S; - else if (internal::is_same::value) - Dtype = SLU_D; - else if (internal::is_same >::value) - Dtype = SLU_C; - else if (internal::is_same >::value) - Dtype = SLU_Z; - else - { - eigen_assert(false && "Scalar type not supported by SuperLU"); - } - } - - template - static SluMatrix Map(MatrixBase& _mat) - { - MatrixType& mat(_mat.derived()); - eigen_assert( ((MatrixType::Flags&RowMajorBit)!=RowMajorBit) && "row-major dense matrices are not supported by SuperLU"); - SluMatrix res; - res.setStorageType(SLU_DN); - res.setScalarType(); - res.Mtype = SLU_GE; - - res.nrow = mat.rows(); - res.ncol = mat.cols(); - - res.storage.lda = MatrixType::IsVectorAtCompileTime ? mat.size() : mat.outerStride(); - res.storage.values = (void*)(mat.data()); - return res; - } - - template - static SluMatrix Map(SparseMatrixBase& mat) - { - SluMatrix res; - if ((MatrixType::Flags&RowMajorBit)==RowMajorBit) - { - res.setStorageType(SLU_NR); - res.nrow = mat.cols(); - res.ncol = mat.rows(); - } - else - { - res.setStorageType(SLU_NC); - res.nrow = mat.rows(); - res.ncol = mat.cols(); - } - - res.Mtype = SLU_GE; - - res.storage.nnz = mat.nonZeros(); - res.storage.values = mat.derived().valuePtr(); - res.storage.innerInd = mat.derived().innerIndexPtr(); - res.storage.outerInd = mat.derived().outerIndexPtr(); - - res.setScalarType(); - - // FIXME the following is not very accurate - if (MatrixType::Flags & Upper) - res.Mtype = SLU_TRU; - if (MatrixType::Flags & Lower) - res.Mtype = SLU_TRL; - - eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU"); - - return res; - } -}; - -template -struct SluMatrixMapHelper > -{ - typedef Matrix MatrixType; - static void run(MatrixType& mat, SluMatrix& res) - { - eigen_assert( ((Options&RowMajor)!=RowMajor) && "row-major dense matrices is not supported by SuperLU"); - res.setStorageType(SLU_DN); - res.setScalarType(); - res.Mtype = SLU_GE; - - res.nrow = mat.rows(); - res.ncol = mat.cols(); - - res.storage.lda = mat.outerStride(); - res.storage.values = mat.data(); - } -}; - -template -struct SluMatrixMapHelper > -{ - typedef Derived MatrixType; - static void run(MatrixType& mat, SluMatrix& res) - { - if ((MatrixType::Flags&RowMajorBit)==RowMajorBit) - { - res.setStorageType(SLU_NR); - res.nrow = mat.cols(); - res.ncol = mat.rows(); - } - else - { - res.setStorageType(SLU_NC); - res.nrow = mat.rows(); - res.ncol = mat.cols(); - } - - res.Mtype = SLU_GE; - - res.storage.nnz = mat.nonZeros(); - res.storage.values = mat.valuePtr(); - res.storage.innerInd = mat.innerIndexPtr(); - res.storage.outerInd = mat.outerIndexPtr(); - - res.setScalarType(); - - // FIXME the following is not very accurate - if (MatrixType::Flags & Upper) - res.Mtype = SLU_TRU; - if (MatrixType::Flags & Lower) - res.Mtype = SLU_TRL; - - eigen_assert(((MatrixType::Flags & SelfAdjoint)==0) && "SelfAdjoint matrix shape not supported by SuperLU"); - } -}; - -namespace internal { - -template -SluMatrix asSluMatrix(MatrixType& mat) -{ - return SluMatrix::Map(mat); -} - -/** View a Super LU matrix as an Eigen expression */ -template -MappedSparseMatrix map_superlu(SluMatrix& sluMat) -{ - eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR - || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC); - - Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow; - - return MappedSparseMatrix( - sluMat.nrow, sluMat.ncol, sluMat.storage.outerInd[outerSize], - sluMat.storage.outerInd, sluMat.storage.innerInd, reinterpret_cast(sluMat.storage.values) ); -} - -} // end namespace internal - -/** \ingroup SuperLUSupport_Module - * \class SuperLUBase - * \brief The base class for the direct and incomplete LU factorization of SuperLU - */ -template -class SuperLUBase : internal::noncopyable -{ - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef Matrix Vector; - typedef Matrix IntRowVectorType; - typedef Matrix IntColVectorType; - typedef SparseMatrix LUMatrixType; - - public: - - SuperLUBase() {} - - ~SuperLUBase() - { - clearFactors(); - } - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - /** \returns a reference to the Super LU option object to configure the Super LU algorithms. */ - inline superlu_options_t& options() { return m_sluOptions; } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - /** Computes the sparse Cholesky decomposition of \a matrix */ - void compute(const MatrixType& matrix) - { - derived().analyzePattern(matrix); - derived().factorize(matrix); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "SuperLU is not initialized."); - eigen_assert(rows()==b.rows() - && "SuperLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "SuperLU is not initialized."); - eigen_assert(rows()==b.rows() - && "SuperLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& /*matrix*/) - { - m_isInitialized = true; - m_info = Success; - m_analysisIsOk = true; - m_factorizationIsOk = false; - } - - template - void dumpMemory(Stream& /*s*/) - {} - - protected: - - void initFactorization(const MatrixType& a) - { - set_default_options(&this->m_sluOptions); - - const int size = a.rows(); - m_matrix = a; - - m_sluA = internal::asSluMatrix(m_matrix); - clearFactors(); - - m_p.resize(size); - m_q.resize(size); - m_sluRscale.resize(size); - m_sluCscale.resize(size); - m_sluEtree.resize(size); - - // set empty B and X - m_sluB.setStorageType(SLU_DN); - m_sluB.setScalarType(); - m_sluB.Mtype = SLU_GE; - m_sluB.storage.values = 0; - m_sluB.nrow = 0; - m_sluB.ncol = 0; - m_sluB.storage.lda = size; - m_sluX = m_sluB; - - m_extractedDataAreDirty = true; - } - - void init() - { - m_info = InvalidInput; - m_isInitialized = false; - m_sluL.Store = 0; - m_sluU.Store = 0; - } - - void extractData() const; - - void clearFactors() - { - if(m_sluL.Store) - Destroy_SuperNode_Matrix(&m_sluL); - if(m_sluU.Store) - Destroy_CompCol_Matrix(&m_sluU); - - m_sluL.Store = 0; - m_sluU.Store = 0; - - memset(&m_sluL,0,sizeof m_sluL); - memset(&m_sluU,0,sizeof m_sluU); - } - - // cached data to reduce reallocation, etc. - mutable LUMatrixType m_l; - mutable LUMatrixType m_u; - mutable IntColVectorType m_p; - mutable IntRowVectorType m_q; - - mutable LUMatrixType m_matrix; // copy of the factorized matrix - mutable SluMatrix m_sluA; - mutable SuperMatrix m_sluL, m_sluU; - mutable SluMatrix m_sluB, m_sluX; - mutable SuperLUStat_t m_sluStat; - mutable superlu_options_t m_sluOptions; - mutable std::vector m_sluEtree; - mutable Matrix m_sluRscale, m_sluCscale; - mutable Matrix m_sluFerr, m_sluBerr; - mutable char m_sluEqued; - - mutable ComputationInfo m_info; - bool m_isInitialized; - int m_factorizationIsOk; - int m_analysisIsOk; - mutable bool m_extractedDataAreDirty; - - private: - SuperLUBase(SuperLUBase& ) { } -}; - - -/** \ingroup SuperLUSupport_Module - * \class SuperLU - * \brief A sparse direct LU factorization and solver based on the SuperLU library - * - * This class allows to solve for A.X = B sparse linear problems via a direct LU factorization - * using the SuperLU library. The sparse matrix A must be squared and invertible. The vectors or matrices - * X and B can be either dense or sparse. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class SuperLU : public SuperLUBase<_MatrixType,SuperLU<_MatrixType> > -{ - public: - typedef SuperLUBase<_MatrixType,SuperLU> Base; - typedef _MatrixType MatrixType; - typedef typename Base::Scalar Scalar; - typedef typename Base::RealScalar RealScalar; - typedef typename Base::Index Index; - typedef typename Base::IntRowVectorType IntRowVectorType; - typedef typename Base::IntColVectorType IntColVectorType; - typedef typename Base::LUMatrixType LUMatrixType; - typedef TriangularView LMatrixType; - typedef TriangularView UMatrixType; - - public: - - SuperLU() : Base() { init(); } - - SuperLU(const MatrixType& matrix) : Base() - { - init(); - Base::compute(matrix); - } - - ~SuperLU() - { - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& matrix) - { - m_info = InvalidInput; - m_isInitialized = false; - Base::analyzePattern(matrix); - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - void factorize(const MatrixType& matrix); - - #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal */ - template - void _solve(const MatrixBase &b, MatrixBase &dest) const; - #endif // EIGEN_PARSED_BY_DOXYGEN - - inline const LMatrixType& matrixL() const - { - if (m_extractedDataAreDirty) this->extractData(); - return m_l; - } - - inline const UMatrixType& matrixU() const - { - if (m_extractedDataAreDirty) this->extractData(); - return m_u; - } - - inline const IntColVectorType& permutationP() const - { - if (m_extractedDataAreDirty) this->extractData(); - return m_p; - } - - inline const IntRowVectorType& permutationQ() const - { - if (m_extractedDataAreDirty) this->extractData(); - return m_q; - } - - Scalar determinant() const; - - protected: - - using Base::m_matrix; - using Base::m_sluOptions; - using Base::m_sluA; - using Base::m_sluB; - using Base::m_sluX; - using Base::m_p; - using Base::m_q; - using Base::m_sluEtree; - using Base::m_sluEqued; - using Base::m_sluRscale; - using Base::m_sluCscale; - using Base::m_sluL; - using Base::m_sluU; - using Base::m_sluStat; - using Base::m_sluFerr; - using Base::m_sluBerr; - using Base::m_l; - using Base::m_u; - - using Base::m_analysisIsOk; - using Base::m_factorizationIsOk; - using Base::m_extractedDataAreDirty; - using Base::m_isInitialized; - using Base::m_info; - - void init() - { - Base::init(); - - set_default_options(&this->m_sluOptions); - m_sluOptions.PrintStat = NO; - m_sluOptions.ConditionNumber = NO; - m_sluOptions.Trans = NOTRANS; - m_sluOptions.ColPerm = COLAMD; - } - - - private: - SuperLU(SuperLU& ) { } -}; - -template -void SuperLU::factorize(const MatrixType& a) -{ - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - if(!m_analysisIsOk) - { - m_info = InvalidInput; - return; - } - - this->initFactorization(a); - - m_sluOptions.ColPerm = COLAMD; - int info = 0; - RealScalar recip_pivot_growth, rcond; - RealScalar ferr, berr; - - StatInit(&m_sluStat); - SuperLU_gssvx(&m_sluOptions, &m_sluA, m_q.data(), m_p.data(), &m_sluEtree[0], - &m_sluEqued, &m_sluRscale[0], &m_sluCscale[0], - &m_sluL, &m_sluU, - NULL, 0, - &m_sluB, &m_sluX, - &recip_pivot_growth, &rcond, - &ferr, &berr, - &m_sluStat, &info, Scalar()); - StatFree(&m_sluStat); - - m_extractedDataAreDirty = true; - - // FIXME how to better check for errors ??? - m_info = info == 0 ? Success : NumericalIssue; - m_factorizationIsOk = true; -} - -template -template -void SuperLU::_solve(const MatrixBase &b, MatrixBase& x) const -{ - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()"); - - const int size = m_matrix.rows(); - const int rhsCols = b.cols(); - eigen_assert(size==b.rows()); - - m_sluOptions.Trans = NOTRANS; - m_sluOptions.Fact = FACTORED; - m_sluOptions.IterRefine = NOREFINE; - - - m_sluFerr.resize(rhsCols); - m_sluBerr.resize(rhsCols); - m_sluB = SluMatrix::Map(b.const_cast_derived()); - m_sluX = SluMatrix::Map(x.derived()); - - typename Rhs::PlainObject b_cpy; - if(m_sluEqued!='N') - { - b_cpy = b; - m_sluB = SluMatrix::Map(b_cpy.const_cast_derived()); - } - - StatInit(&m_sluStat); - int info = 0; - RealScalar recip_pivot_growth, rcond; - SuperLU_gssvx(&m_sluOptions, &m_sluA, - m_q.data(), m_p.data(), - &m_sluEtree[0], &m_sluEqued, - &m_sluRscale[0], &m_sluCscale[0], - &m_sluL, &m_sluU, - NULL, 0, - &m_sluB, &m_sluX, - &recip_pivot_growth, &rcond, - &m_sluFerr[0], &m_sluBerr[0], - &m_sluStat, &info, Scalar()); - StatFree(&m_sluStat); - m_info = info==0 ? Success : NumericalIssue; -} - -// the code of this extractData() function has been adapted from the SuperLU's Matlab support code, -// -// Copyright (c) 1994 by Xerox Corporation. All rights reserved. -// -// THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY -// EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. -// -template -void SuperLUBase::extractData() const -{ - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for extracting factors, you must first call either compute() or analyzePattern()/factorize()"); - if (m_extractedDataAreDirty) - { - int upper; - int fsupc, istart, nsupr; - int lastl = 0, lastu = 0; - SCformat *Lstore = static_cast(m_sluL.Store); - NCformat *Ustore = static_cast(m_sluU.Store); - Scalar *SNptr; - - const int size = m_matrix.rows(); - m_l.resize(size,size); - m_l.resizeNonZeros(Lstore->nnz); - m_u.resize(size,size); - m_u.resizeNonZeros(Ustore->nnz); - - int* Lcol = m_l.outerIndexPtr(); - int* Lrow = m_l.innerIndexPtr(); - Scalar* Lval = m_l.valuePtr(); - - int* Ucol = m_u.outerIndexPtr(); - int* Urow = m_u.innerIndexPtr(); - Scalar* Uval = m_u.valuePtr(); - - Ucol[0] = 0; - Ucol[0] = 0; - - /* for each supernode */ - for (int k = 0; k <= Lstore->nsuper; ++k) - { - fsupc = L_FST_SUPC(k); - istart = L_SUB_START(fsupc); - nsupr = L_SUB_START(fsupc+1) - istart; - upper = 1; - - /* for each column in the supernode */ - for (int j = fsupc; j < L_FST_SUPC(k+1); ++j) - { - SNptr = &((Scalar*)Lstore->nzval)[L_NZ_START(j)]; - - /* Extract U */ - for (int i = U_NZ_START(j); i < U_NZ_START(j+1); ++i) - { - Uval[lastu] = ((Scalar*)Ustore->nzval)[i]; - /* Matlab doesn't like explicit zero. */ - if (Uval[lastu] != 0.0) - Urow[lastu++] = U_SUB(i); - } - for (int i = 0; i < upper; ++i) - { - /* upper triangle in the supernode */ - Uval[lastu] = SNptr[i]; - /* Matlab doesn't like explicit zero. */ - if (Uval[lastu] != 0.0) - Urow[lastu++] = L_SUB(istart+i); - } - Ucol[j+1] = lastu; - - /* Extract L */ - Lval[lastl] = 1.0; /* unit diagonal */ - Lrow[lastl++] = L_SUB(istart + upper - 1); - for (int i = upper; i < nsupr; ++i) - { - Lval[lastl] = SNptr[i]; - /* Matlab doesn't like explicit zero. */ - if (Lval[lastl] != 0.0) - Lrow[lastl++] = L_SUB(istart+i); - } - Lcol[j+1] = lastl; - - ++upper; - } /* for j ... */ - - } /* for k ... */ - - // squeeze the matrices : - m_l.resizeNonZeros(lastl); - m_u.resizeNonZeros(lastu); - - m_extractedDataAreDirty = false; - } -} - -template -typename SuperLU::Scalar SuperLU::determinant() const -{ - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for computing the determinant, you must first call either compute() or analyzePattern()/factorize()"); - - if (m_extractedDataAreDirty) - this->extractData(); - - Scalar det = Scalar(1); - for (int j=0; j 0) - { - int lastId = m_u.outerIndexPtr()[j+1]-1; - eigen_assert(m_u.innerIndexPtr()[lastId]<=j); - if (m_u.innerIndexPtr()[lastId]==j) - det *= m_u.valuePtr()[lastId]; - } - } - if(m_sluEqued!='N') - return det/m_sluRscale.prod()/m_sluCscale.prod(); - else - return det; -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN -#define EIGEN_SUPERLU_HAS_ILU -#endif - -#ifdef EIGEN_SUPERLU_HAS_ILU - -/** \ingroup SuperLUSupport_Module - * \class SuperILU - * \brief A sparse direct \b incomplete LU factorization and solver based on the SuperLU library - * - * This class allows to solve for an approximate solution of A.X = B sparse linear problems via an incomplete LU factorization - * using the SuperLU library. This class is aimed to be used as a preconditioner of the iterative linear solvers. - * - * \warning This class requires SuperLU 4 or later. - * - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * - * \sa \ref TutorialSparseDirectSolvers, class ConjugateGradient, class BiCGSTAB - */ - -template -class SuperILU : public SuperLUBase<_MatrixType,SuperILU<_MatrixType> > -{ - public: - typedef SuperLUBase<_MatrixType,SuperILU> Base; - typedef _MatrixType MatrixType; - typedef typename Base::Scalar Scalar; - typedef typename Base::RealScalar RealScalar; - typedef typename Base::Index Index; - - public: - - SuperILU() : Base() { init(); } - - SuperILU(const MatrixType& matrix) : Base() - { - init(); - Base::compute(matrix); - } - - ~SuperILU() - { - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize() - */ - void analyzePattern(const MatrixType& matrix) - { - Base::analyzePattern(matrix); - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the symbolic decomposition has been performed. - * - * \sa analyzePattern() - */ - void factorize(const MatrixType& matrix); - - #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal */ - template - void _solve(const MatrixBase &b, MatrixBase &dest) const; - #endif // EIGEN_PARSED_BY_DOXYGEN - - protected: - - using Base::m_matrix; - using Base::m_sluOptions; - using Base::m_sluA; - using Base::m_sluB; - using Base::m_sluX; - using Base::m_p; - using Base::m_q; - using Base::m_sluEtree; - using Base::m_sluEqued; - using Base::m_sluRscale; - using Base::m_sluCscale; - using Base::m_sluL; - using Base::m_sluU; - using Base::m_sluStat; - using Base::m_sluFerr; - using Base::m_sluBerr; - using Base::m_l; - using Base::m_u; - - using Base::m_analysisIsOk; - using Base::m_factorizationIsOk; - using Base::m_extractedDataAreDirty; - using Base::m_isInitialized; - using Base::m_info; - - void init() - { - Base::init(); - - ilu_set_default_options(&m_sluOptions); - m_sluOptions.PrintStat = NO; - m_sluOptions.ConditionNumber = NO; - m_sluOptions.Trans = NOTRANS; - m_sluOptions.ColPerm = MMD_AT_PLUS_A; - - // no attempt to preserve column sum - m_sluOptions.ILU_MILU = SILU; - // only basic ILU(k) support -- no direct control over memory consumption - // better to use ILU_DropRule = DROP_BASIC | DROP_AREA - // and set ILU_FillFactor to max memory growth - m_sluOptions.ILU_DropRule = DROP_BASIC; - m_sluOptions.ILU_DropTol = NumTraits::dummy_precision()*10; - } - - private: - SuperILU(SuperILU& ) { } -}; - -template -void SuperILU::factorize(const MatrixType& a) -{ - eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); - if(!m_analysisIsOk) - { - m_info = InvalidInput; - return; - } - - this->initFactorization(a); - - int info = 0; - RealScalar recip_pivot_growth, rcond; - - StatInit(&m_sluStat); - SuperLU_gsisx(&m_sluOptions, &m_sluA, m_q.data(), m_p.data(), &m_sluEtree[0], - &m_sluEqued, &m_sluRscale[0], &m_sluCscale[0], - &m_sluL, &m_sluU, - NULL, 0, - &m_sluB, &m_sluX, - &recip_pivot_growth, &rcond, - &m_sluStat, &info, Scalar()); - StatFree(&m_sluStat); - - // FIXME how to better check for errors ??? - m_info = info == 0 ? Success : NumericalIssue; - m_factorizationIsOk = true; -} - -template -template -void SuperILU::_solve(const MatrixBase &b, MatrixBase& x) const -{ - eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or analyzePattern()/factorize()"); - - const int size = m_matrix.rows(); - const int rhsCols = b.cols(); - eigen_assert(size==b.rows()); - - m_sluOptions.Trans = NOTRANS; - m_sluOptions.Fact = FACTORED; - m_sluOptions.IterRefine = NOREFINE; - - m_sluFerr.resize(rhsCols); - m_sluBerr.resize(rhsCols); - m_sluB = SluMatrix::Map(b.const_cast_derived()); - m_sluX = SluMatrix::Map(x.derived()); - - typename Rhs::PlainObject b_cpy; - if(m_sluEqued!='N') - { - b_cpy = b; - m_sluB = SluMatrix::Map(b_cpy.const_cast_derived()); - } - - int info = 0; - RealScalar recip_pivot_growth, rcond; - - StatInit(&m_sluStat); - SuperLU_gsisx(&m_sluOptions, &m_sluA, - m_q.data(), m_p.data(), - &m_sluEtree[0], &m_sluEqued, - &m_sluRscale[0], &m_sluCscale[0], - &m_sluL, &m_sluU, - NULL, 0, - &m_sluB, &m_sluX, - &recip_pivot_growth, &rcond, - &m_sluStat, &info, Scalar()); - StatFree(&m_sluStat); - - m_info = info==0 ? Success : NumericalIssue; -} -#endif - -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef SuperLUBase<_MatrixType,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec().derived()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef SuperLUBase<_MatrixType,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_SUPERLUSUPPORT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/CMakeLists.txt index a57de0020..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_UmfPackSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_UmfPackSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/UmfPackSupport COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/UmfPackSupport.h b/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/UmfPackSupport.h index 54b1799ea..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -1,475 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2011 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_UMFPACKSUPPORT_H -#define EIGEN_UMFPACKSUPPORT_H - -namespace Eigen { - -/* TODO extract L, extract U, compute det, etc... */ - -// generic double/complex wrapper functions: - -inline void umfpack_free_numeric(void **Numeric, double) -{ umfpack_di_free_numeric(Numeric); *Numeric = 0; } - -inline void umfpack_free_numeric(void **Numeric, std::complex) -{ umfpack_zi_free_numeric(Numeric); *Numeric = 0; } - -inline void umfpack_free_symbolic(void **Symbolic, double) -{ umfpack_di_free_symbolic(Symbolic); *Symbolic = 0; } - -inline void umfpack_free_symbolic(void **Symbolic, std::complex) -{ umfpack_zi_free_symbolic(Symbolic); *Symbolic = 0; } - -inline int umfpack_symbolic(int n_row,int n_col, - const int Ap[], const int Ai[], const double Ax[], void **Symbolic, - const double Control [UMFPACK_CONTROL], double Info [UMFPACK_INFO]) -{ - return umfpack_di_symbolic(n_row,n_col,Ap,Ai,Ax,Symbolic,Control,Info); -} - -inline int umfpack_symbolic(int n_row,int n_col, - const int Ap[], const int Ai[], const std::complex Ax[], void **Symbolic, - const double Control [UMFPACK_CONTROL], double Info [UMFPACK_INFO]) -{ - return umfpack_zi_symbolic(n_row,n_col,Ap,Ai,&numext::real_ref(Ax[0]),0,Symbolic,Control,Info); -} - -inline int umfpack_numeric( const int Ap[], const int Ai[], const double Ax[], - void *Symbolic, void **Numeric, - const double Control[UMFPACK_CONTROL],double Info [UMFPACK_INFO]) -{ - return umfpack_di_numeric(Ap,Ai,Ax,Symbolic,Numeric,Control,Info); -} - -inline int umfpack_numeric( const int Ap[], const int Ai[], const std::complex Ax[], - void *Symbolic, void **Numeric, - const double Control[UMFPACK_CONTROL],double Info [UMFPACK_INFO]) -{ - return umfpack_zi_numeric(Ap,Ai,&numext::real_ref(Ax[0]),0,Symbolic,Numeric,Control,Info); -} - -inline int umfpack_solve( int sys, const int Ap[], const int Ai[], const double Ax[], - double X[], const double B[], void *Numeric, - const double Control[UMFPACK_CONTROL], double Info[UMFPACK_INFO]) -{ - return umfpack_di_solve(sys,Ap,Ai,Ax,X,B,Numeric,Control,Info); -} - -inline int umfpack_solve( int sys, const int Ap[], const int Ai[], const std::complex Ax[], - std::complex X[], const std::complex B[], void *Numeric, - const double Control[UMFPACK_CONTROL], double Info[UMFPACK_INFO]) -{ - return umfpack_zi_solve(sys,Ap,Ai,&numext::real_ref(Ax[0]),0,&numext::real_ref(X[0]),0,&numext::real_ref(B[0]),0,Numeric,Control,Info); -} - -inline int umfpack_get_lunz(int *lnz, int *unz, int *n_row, int *n_col, int *nz_udiag, void *Numeric, double) -{ - return umfpack_di_get_lunz(lnz,unz,n_row,n_col,nz_udiag,Numeric); -} - -inline int umfpack_get_lunz(int *lnz, int *unz, int *n_row, int *n_col, int *nz_udiag, void *Numeric, std::complex) -{ - return umfpack_zi_get_lunz(lnz,unz,n_row,n_col,nz_udiag,Numeric); -} - -inline int umfpack_get_numeric(int Lp[], int Lj[], double Lx[], int Up[], int Ui[], double Ux[], - int P[], int Q[], double Dx[], int *do_recip, double Rs[], void *Numeric) -{ - return umfpack_di_get_numeric(Lp,Lj,Lx,Up,Ui,Ux,P,Q,Dx,do_recip,Rs,Numeric); -} - -inline int umfpack_get_numeric(int Lp[], int Lj[], std::complex Lx[], int Up[], int Ui[], std::complex Ux[], - int P[], int Q[], std::complex Dx[], int *do_recip, double Rs[], void *Numeric) -{ - double& lx0_real = numext::real_ref(Lx[0]); - double& ux0_real = numext::real_ref(Ux[0]); - double& dx0_real = numext::real_ref(Dx[0]); - return umfpack_zi_get_numeric(Lp,Lj,Lx?&lx0_real:0,0,Up,Ui,Ux?&ux0_real:0,0,P,Q, - Dx?&dx0_real:0,0,do_recip,Rs,Numeric); -} - -inline int umfpack_get_determinant(double *Mx, double *Ex, void *NumericHandle, double User_Info [UMFPACK_INFO]) -{ - return umfpack_di_get_determinant(Mx,Ex,NumericHandle,User_Info); -} - -inline int umfpack_get_determinant(std::complex *Mx, double *Ex, void *NumericHandle, double User_Info [UMFPACK_INFO]) -{ - double& mx_real = numext::real_ref(*Mx); - return umfpack_zi_get_determinant(&mx_real,0,Ex,NumericHandle,User_Info); -} - -namespace internal { - template struct umfpack_helper_is_sparse_plain : false_type {}; - template - struct umfpack_helper_is_sparse_plain > - : true_type {}; - template - struct umfpack_helper_is_sparse_plain > - : true_type {}; -} - -/** \ingroup UmfPackSupport_Module - * \brief A sparse LU factorization and solver based on UmfPack - * - * This class allows to solve for A.X = B sparse linear problems via a LU factorization - * using the UmfPack library. The sparse matrix A must be squared and full rank. - * The vectors or matrices X and B can be either dense or sparse. - * - * \warning The input matrix A should be in a \b compressed and \b column-major form. - * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix. - * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> - * - * \sa \ref TutorialSparseDirectSolvers - */ -template -class UmfPackLU : internal::noncopyable -{ - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - typedef Matrix Vector; - typedef Matrix IntRowVectorType; - typedef Matrix IntColVectorType; - typedef SparseMatrix LUMatrixType; - typedef SparseMatrix UmfpackMatrixType; - - public: - - UmfPackLU() { init(); } - - template - UmfPackLU(const InputMatrixType& matrix) - { - init(); - compute(matrix); - } - - ~UmfPackLU() - { - if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); - if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - } - - inline Index rows() const { return m_copyMatrix.rows(); } - inline Index cols() const { return m_copyMatrix.cols(); } - - /** \brief Reports whether previous computation was successful. - * - * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. - */ - ComputationInfo info() const - { - eigen_assert(m_isInitialized && "Decomposition is not initialized."); - return m_info; - } - - inline const LUMatrixType& matrixL() const - { - if (m_extractedDataAreDirty) extractData(); - return m_l; - } - - inline const LUMatrixType& matrixU() const - { - if (m_extractedDataAreDirty) extractData(); - return m_u; - } - - inline const IntColVectorType& permutationP() const - { - if (m_extractedDataAreDirty) extractData(); - return m_p; - } - - inline const IntRowVectorType& permutationQ() const - { - if (m_extractedDataAreDirty) extractData(); - return m_q; - } - - /** Computes the sparse Cholesky decomposition of \a matrix - * Note that the matrix should be column-major, and in compressed format for best performance. - * \sa SparseMatrix::makeCompressed(). - */ - template - void compute(const InputMatrixType& matrix) - { - if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); - if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - grapInput(matrix.derived()); - analyzePattern_impl(); - factorize_impl(); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "UmfPackLU is not initialized."); - eigen_assert(rows()==b.rows() - && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "UmfPackLU is not initialized."); - eigen_assert(rows()==b.rows() - && "UmfPackLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - - /** Performs a symbolic decomposition on the sparcity of \a matrix. - * - * This function is particularly useful when solving for several problems having the same structure. - * - * \sa factorize(), compute() - */ - template - void analyzePattern(const InputMatrixType& matrix) - { - if(m_symbolic) umfpack_free_symbolic(&m_symbolic,Scalar()); - if(m_numeric) umfpack_free_numeric(&m_numeric,Scalar()); - - grapInput(matrix.derived()); - - analyzePattern_impl(); - } - - /** Performs a numeric decomposition of \a matrix - * - * The given matrix must has the same sparcity than the matrix on which the pattern anylysis has been performed. - * - * \sa analyzePattern(), compute() - */ - template - void factorize(const InputMatrixType& matrix) - { - eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); - if(m_numeric) - umfpack_free_numeric(&m_numeric,Scalar()); - - grapInput(matrix.derived()); - - factorize_impl(); - } - - #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal */ - template - bool _solve(const MatrixBase &b, MatrixBase &x) const; - #endif - - Scalar determinant() const; - - void extractData() const; - - protected: - - void init() - { - m_info = InvalidInput; - m_isInitialized = false; - m_numeric = 0; - m_symbolic = 0; - m_outerIndexPtr = 0; - m_innerIndexPtr = 0; - m_valuePtr = 0; - m_extractedDataAreDirty = true; - } - - template - void grapInput_impl(const InputMatrixType& mat, internal::true_type) - { - m_copyMatrix.resize(mat.rows(), mat.cols()); - if( ((MatrixType::Flags&RowMajorBit)==RowMajorBit) || sizeof(typename MatrixType::Index)!=sizeof(int) || !mat.isCompressed() ) - { - // non supported input -> copy - m_copyMatrix = mat; - m_outerIndexPtr = m_copyMatrix.outerIndexPtr(); - m_innerIndexPtr = m_copyMatrix.innerIndexPtr(); - m_valuePtr = m_copyMatrix.valuePtr(); - } - else - { - m_outerIndexPtr = mat.outerIndexPtr(); - m_innerIndexPtr = mat.innerIndexPtr(); - m_valuePtr = mat.valuePtr(); - } - } - - template - void grapInput_impl(const InputMatrixType& mat, internal::false_type) - { - m_copyMatrix = mat; - m_outerIndexPtr = m_copyMatrix.outerIndexPtr(); - m_innerIndexPtr = m_copyMatrix.innerIndexPtr(); - m_valuePtr = m_copyMatrix.valuePtr(); - } - - template - void grapInput(const InputMatrixType& mat) - { - grapInput_impl(mat, internal::umfpack_helper_is_sparse_plain()); - } - - void analyzePattern_impl() - { - int errorCode = 0; - errorCode = umfpack_symbolic(m_copyMatrix.rows(), m_copyMatrix.cols(), m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, - &m_symbolic, 0, 0); - - m_isInitialized = true; - m_info = errorCode ? InvalidInput : Success; - m_analysisIsOk = true; - m_factorizationIsOk = false; - m_extractedDataAreDirty = true; - } - - void factorize_impl() - { - int errorCode; - errorCode = umfpack_numeric(m_outerIndexPtr, m_innerIndexPtr, m_valuePtr, - m_symbolic, &m_numeric, 0, 0); - - m_info = errorCode ? NumericalIssue : Success; - m_factorizationIsOk = true; - m_extractedDataAreDirty = true; - } - - // cached data to reduce reallocation, etc. - mutable LUMatrixType m_l; - mutable LUMatrixType m_u; - mutable IntColVectorType m_p; - mutable IntRowVectorType m_q; - - UmfpackMatrixType m_copyMatrix; - const Scalar* m_valuePtr; - const int* m_outerIndexPtr; - const int* m_innerIndexPtr; - void* m_numeric; - void* m_symbolic; - - mutable ComputationInfo m_info; - bool m_isInitialized; - int m_factorizationIsOk; - int m_analysisIsOk; - mutable bool m_extractedDataAreDirty; - - private: - UmfPackLU(UmfPackLU& ) { } -}; - - -template -void UmfPackLU::extractData() const -{ - if (m_extractedDataAreDirty) - { - // get size of the data - int lnz, unz, rows, cols, nz_udiag; - umfpack_get_lunz(&lnz, &unz, &rows, &cols, &nz_udiag, m_numeric, Scalar()); - - // allocate data - m_l.resize(rows,(std::min)(rows,cols)); - m_l.resizeNonZeros(lnz); - - m_u.resize((std::min)(rows,cols),cols); - m_u.resizeNonZeros(unz); - - m_p.resize(rows); - m_q.resize(cols); - - // extract - umfpack_get_numeric(m_l.outerIndexPtr(), m_l.innerIndexPtr(), m_l.valuePtr(), - m_u.outerIndexPtr(), m_u.innerIndexPtr(), m_u.valuePtr(), - m_p.data(), m_q.data(), 0, 0, 0, m_numeric); - - m_extractedDataAreDirty = false; - } -} - -template -typename UmfPackLU::Scalar UmfPackLU::determinant() const -{ - Scalar det; - umfpack_get_determinant(&det, 0, m_numeric, 0); - return det; -} - -template -template -bool UmfPackLU::_solve(const MatrixBase &b, MatrixBase &x) const -{ - const int rhsCols = b.cols(); - eigen_assert((BDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major rhs yet"); - eigen_assert((XDerived::Flags&RowMajorBit)==0 && "UmfPackLU backend does not support non col-major result yet"); - eigen_assert(b.derived().data() != x.derived().data() && " Umfpack does not support inplace solve"); - - int errorCode; - for (int j=0; j -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef UmfPackLU<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef UmfPackLU<_MatrixType> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - this->defaultEvalTo(dst); - } -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_UMFPACKSUPPORT_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/misc/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/misc/CMakeLists.txt index a58ffb745..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/misc/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/misc/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_misc_SRCS "*.h") - -INSTALL(FILES - ${Eigen_misc_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/misc COMPONENT Devel - ) diff --git a/thirdparty/eigen-3.2.10/Eigen/src/misc/SparseSolve.h b/thirdparty/eigen-3.2.10/Eigen/src/misc/SparseSolve.h index 244bb8ec7..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/misc/SparseSolve.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/misc/SparseSolve.h @@ -1,128 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SPARSE_SOLVE_H -#define EIGEN_SPARSE_SOLVE_H - -namespace Eigen { - -namespace internal { - -template struct sparse_solve_retval_base; -template struct sparse_solve_retval; - -template -struct traits > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef SparseMatrix ReturnType; -}; - -template struct sparse_solve_retval_base - : public ReturnByValue > -{ - typedef typename remove_all::type RhsNestedCleaned; - typedef _DecompositionType DecompositionType; - typedef ReturnByValue Base; - typedef typename Base::Index Index; - - sparse_solve_retval_base(const DecompositionType& dec, const Rhs& rhs) - : m_dec(dec), m_rhs(rhs) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - inline const DecompositionType& dec() const { return m_dec; } - inline const RhsNestedCleaned& rhs() const { return m_rhs; } - - template inline void evalTo(Dest& dst) const - { - static_cast*>(this)->evalTo(dst); - } - - protected: - template - inline void defaultEvalTo(SparseMatrix& dst) const - { - // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix. - static const int NbColsAtOnce = 4; - int rhsCols = m_rhs.cols(); - int size = m_rhs.rows(); - Eigen::Matrix tmp(size,rhsCols); - Eigen::Matrix tmpX(size,rhsCols); - for(int k=0; k(rhsCols-k, NbColsAtOnce); - tmp.leftCols(actualCols) = m_rhs.middleCols(k,actualCols); - tmpX.leftCols(actualCols) = m_dec.solve(tmp.leftCols(actualCols)); - dst.middleCols(k,actualCols) = tmpX.leftCols(actualCols).sparseView(); - } - } - const DecompositionType& m_dec; - typename Rhs::Nested m_rhs; -}; - -#define EIGEN_MAKE_SPARSE_SOLVE_HELPERS(DecompositionType,Rhs) \ - typedef typename DecompositionType::MatrixType MatrixType; \ - typedef typename MatrixType::Scalar Scalar; \ - typedef typename MatrixType::RealScalar RealScalar; \ - typedef typename MatrixType::Index Index; \ - typedef Eigen::internal::sparse_solve_retval_base Base; \ - using Base::dec; \ - using Base::rhs; \ - using Base::rows; \ - using Base::cols; \ - sparse_solve_retval(const DecompositionType& dec, const Rhs& rhs) \ - : Base(dec, rhs) {} - - - -template struct solve_retval_with_guess; - -template -struct traits > -{ - typedef typename DecompositionType::MatrixType MatrixType; - typedef Matrix ReturnType; -}; - -template struct solve_retval_with_guess - : public ReturnByValue > -{ - typedef typename DecompositionType::Index Index; - - solve_retval_with_guess(const DecompositionType& dec, const Rhs& rhs, const Guess& guess) - : m_dec(dec), m_rhs(rhs), m_guess(guess) - {} - - inline Index rows() const { return m_dec.cols(); } - inline Index cols() const { return m_rhs.cols(); } - - template inline void evalTo(Dest& dst) const - { - dst = m_guess; - m_dec._solveWithGuess(m_rhs,dst); - } - - protected: - const DecompositionType& m_dec; - const typename Rhs::Nested m_rhs; - const typename Guess::Nested m_guess; -}; - -} // namepsace internal - -} // end namespace Eigen - -#endif // EIGEN_SPARSE_SOLVE_H diff --git a/thirdparty/eigen-3.2.10/Eigen/src/misc/blas.h b/thirdparty/eigen-3.2.10/Eigen/src/misc/blas.h index 6fce99ed5..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/misc/blas.h +++ b/thirdparty/eigen-3.2.10/Eigen/src/misc/blas.h @@ -1,658 +0,0 @@ -#ifndef BLAS_H -#define BLAS_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define BLASFUNC(FUNC) FUNC##_ - -#ifdef __WIN64__ -typedef long long BLASLONG; -typedef unsigned long long BLASULONG; -#else -typedef long BLASLONG; -typedef unsigned long BLASULONG; -#endif - -int BLASFUNC(xerbla)(const char *, int *info, int); - -float BLASFUNC(sdot) (int *, float *, int *, float *, int *); -float BLASFUNC(sdsdot)(int *, float *, float *, int *, float *, int *); - -double BLASFUNC(dsdot) (int *, float *, int *, float *, int *); -double BLASFUNC(ddot) (int *, double *, int *, double *, int *); -double BLASFUNC(qdot) (int *, double *, int *, double *, int *); - -int BLASFUNC(cdotuw) (int *, float *, int *, float *, int *, float*); -int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*); -int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*); -int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*); - -int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *); -int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *); -int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *); -int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *); -int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *); -int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *); - -int BLASFUNC(scopy) (int *, float *, int *, float *, int *); -int BLASFUNC(dcopy) (int *, double *, int *, double *, int *); -int BLASFUNC(qcopy) (int *, double *, int *, double *, int *); -int BLASFUNC(ccopy) (int *, float *, int *, float *, int *); -int BLASFUNC(zcopy) (int *, double *, int *, double *, int *); -int BLASFUNC(xcopy) (int *, double *, int *, double *, int *); - -int BLASFUNC(sswap) (int *, float *, int *, float *, int *); -int BLASFUNC(dswap) (int *, double *, int *, double *, int *); -int BLASFUNC(qswap) (int *, double *, int *, double *, int *); -int BLASFUNC(cswap) (int *, float *, int *, float *, int *); -int BLASFUNC(zswap) (int *, double *, int *, double *, int *); -int BLASFUNC(xswap) (int *, double *, int *, double *, int *); - -float BLASFUNC(sasum) (int *, float *, int *); -float BLASFUNC(scasum)(int *, float *, int *); -double BLASFUNC(dasum) (int *, double *, int *); -double BLASFUNC(qasum) (int *, double *, int *); -double BLASFUNC(dzasum)(int *, double *, int *); -double BLASFUNC(qxasum)(int *, double *, int *); - -int BLASFUNC(isamax)(int *, float *, int *); -int BLASFUNC(idamax)(int *, double *, int *); -int BLASFUNC(iqamax)(int *, double *, int *); -int BLASFUNC(icamax)(int *, float *, int *); -int BLASFUNC(izamax)(int *, double *, int *); -int BLASFUNC(ixamax)(int *, double *, int *); - -int BLASFUNC(ismax) (int *, float *, int *); -int BLASFUNC(idmax) (int *, double *, int *); -int BLASFUNC(iqmax) (int *, double *, int *); -int BLASFUNC(icmax) (int *, float *, int *); -int BLASFUNC(izmax) (int *, double *, int *); -int BLASFUNC(ixmax) (int *, double *, int *); - -int BLASFUNC(isamin)(int *, float *, int *); -int BLASFUNC(idamin)(int *, double *, int *); -int BLASFUNC(iqamin)(int *, double *, int *); -int BLASFUNC(icamin)(int *, float *, int *); -int BLASFUNC(izamin)(int *, double *, int *); -int BLASFUNC(ixamin)(int *, double *, int *); - -int BLASFUNC(ismin)(int *, float *, int *); -int BLASFUNC(idmin)(int *, double *, int *); -int BLASFUNC(iqmin)(int *, double *, int *); -int BLASFUNC(icmin)(int *, float *, int *); -int BLASFUNC(izmin)(int *, double *, int *); -int BLASFUNC(ixmin)(int *, double *, int *); - -float BLASFUNC(samax) (int *, float *, int *); -double BLASFUNC(damax) (int *, double *, int *); -double BLASFUNC(qamax) (int *, double *, int *); -float BLASFUNC(scamax)(int *, float *, int *); -double BLASFUNC(dzamax)(int *, double *, int *); -double BLASFUNC(qxamax)(int *, double *, int *); - -float BLASFUNC(samin) (int *, float *, int *); -double BLASFUNC(damin) (int *, double *, int *); -double BLASFUNC(qamin) (int *, double *, int *); -float BLASFUNC(scamin)(int *, float *, int *); -double BLASFUNC(dzamin)(int *, double *, int *); -double BLASFUNC(qxamin)(int *, double *, int *); - -float BLASFUNC(smax) (int *, float *, int *); -double BLASFUNC(dmax) (int *, double *, int *); -double BLASFUNC(qmax) (int *, double *, int *); -float BLASFUNC(scmax) (int *, float *, int *); -double BLASFUNC(dzmax) (int *, double *, int *); -double BLASFUNC(qxmax) (int *, double *, int *); - -float BLASFUNC(smin) (int *, float *, int *); -double BLASFUNC(dmin) (int *, double *, int *); -double BLASFUNC(qmin) (int *, double *, int *); -float BLASFUNC(scmin) (int *, float *, int *); -double BLASFUNC(dzmin) (int *, double *, int *); -double BLASFUNC(qxmin) (int *, double *, int *); - -int BLASFUNC(sscal) (int *, float *, float *, int *); -int BLASFUNC(dscal) (int *, double *, double *, int *); -int BLASFUNC(qscal) (int *, double *, double *, int *); -int BLASFUNC(cscal) (int *, float *, float *, int *); -int BLASFUNC(zscal) (int *, double *, double *, int *); -int BLASFUNC(xscal) (int *, double *, double *, int *); -int BLASFUNC(csscal)(int *, float *, float *, int *); -int BLASFUNC(zdscal)(int *, double *, double *, int *); -int BLASFUNC(xqscal)(int *, double *, double *, int *); - -float BLASFUNC(snrm2) (int *, float *, int *); -float BLASFUNC(scnrm2)(int *, float *, int *); - -double BLASFUNC(dnrm2) (int *, double *, int *); -double BLASFUNC(qnrm2) (int *, double *, int *); -double BLASFUNC(dznrm2)(int *, double *, int *); -double BLASFUNC(qxnrm2)(int *, double *, int *); - -int BLASFUNC(srot) (int *, float *, int *, float *, int *, float *, float *); -int BLASFUNC(drot) (int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(qrot) (int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(csrot) (int *, float *, int *, float *, int *, float *, float *); -int BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *); -int BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *); - -int BLASFUNC(srotg) (float *, float *, float *, float *); -int BLASFUNC(drotg) (double *, double *, double *, double *); -int BLASFUNC(qrotg) (double *, double *, double *, double *); -int BLASFUNC(crotg) (float *, float *, float *, float *); -int BLASFUNC(zrotg) (double *, double *, double *, double *); -int BLASFUNC(xrotg) (double *, double *, double *, double *); - -int BLASFUNC(srotmg)(float *, float *, float *, float *, float *); -int BLASFUNC(drotmg)(double *, double *, double *, double *, double *); - -int BLASFUNC(srotm) (int *, float *, int *, float *, int *, float *); -int BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *); -int BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *); - -/* Level 2 routines */ - -int BLASFUNC(sger)(int *, int *, float *, float *, int *, - float *, int *, float *, int *); -int BLASFUNC(dger)(int *, int *, double *, double *, int *, - double *, int *, double *, int *); -int BLASFUNC(qger)(int *, int *, double *, double *, int *, - double *, int *, double *, int *); -int BLASFUNC(cgeru)(int *, int *, float *, float *, int *, - float *, int *, float *, int *); -int BLASFUNC(cgerc)(int *, int *, float *, float *, int *, - float *, int *, float *, int *); -int BLASFUNC(zgeru)(int *, int *, double *, double *, int *, - double *, int *, double *, int *); -int BLASFUNC(zgerc)(int *, int *, double *, double *, int *, - double *, int *, double *, int *); -int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, - double *, int *, double *, int *); -int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, - double *, int *, double *, int *); - -int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *, - double *, int *); - -int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *); - -int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *, - float *, int *); -int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); -int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *, - double *, int *); - -int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(ctpmv) (char *, char *, char *, int *, float *, float *, int *); -int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *); -int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *); - -int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); - -int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); -int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); -int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); - -int BLASFUNC(ssymv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csymv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(sspmv) (char *, int *, float *, float *, - float *, int *, float *, float *, int *); -int BLASFUNC(dspmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); -int BLASFUNC(qspmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); -int BLASFUNC(cspmv) (char *, int *, float *, float *, - float *, int *, float *, float *, int *); -int BLASFUNC(zspmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); -int BLASFUNC(xspmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); - -int BLASFUNC(ssyr) (char *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(dsyr) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(qsyr) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(csyr) (char *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(zsyr) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(xsyr) (char *, int *, double *, double *, int *, - double *, int *); - -int BLASFUNC(ssyr2) (char *, int *, float *, - float *, int *, float *, int *, float *, int *); -int BLASFUNC(dsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(qsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(csyr2) (char *, int *, float *, - float *, int *, float *, int *, float *, int *); -int BLASFUNC(zsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(xsyr2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); - -int BLASFUNC(sspr) (char *, int *, float *, float *, int *, - float *); -int BLASFUNC(dspr) (char *, int *, double *, double *, int *, - double *); -int BLASFUNC(qspr) (char *, int *, double *, double *, int *, - double *); -int BLASFUNC(cspr) (char *, int *, float *, float *, int *, - float *); -int BLASFUNC(zspr) (char *, int *, double *, double *, int *, - double *); -int BLASFUNC(xspr) (char *, int *, double *, double *, int *, - double *); - -int BLASFUNC(sspr2) (char *, int *, float *, - float *, int *, float *, int *, float *); -int BLASFUNC(dspr2) (char *, int *, double *, - double *, int *, double *, int *, double *); -int BLASFUNC(qspr2) (char *, int *, double *, - double *, int *, double *, int *, double *); -int BLASFUNC(cspr2) (char *, int *, float *, - float *, int *, float *, int *, float *); -int BLASFUNC(zspr2) (char *, int *, double *, - double *, int *, double *, int *, double *); -int BLASFUNC(xspr2) (char *, int *, double *, - double *, int *, double *, int *, double *); - -int BLASFUNC(cher) (char *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(zher) (char *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(xher) (char *, int *, double *, double *, int *, - double *, int *); - -int BLASFUNC(chpr) (char *, int *, float *, float *, int *, float *); -int BLASFUNC(zhpr) (char *, int *, double *, double *, int *, double *); -int BLASFUNC(xhpr) (char *, int *, double *, double *, int *, double *); - -int BLASFUNC(cher2) (char *, int *, float *, - float *, int *, float *, int *, float *, int *); -int BLASFUNC(zher2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); -int BLASFUNC(xher2) (char *, int *, double *, - double *, int *, double *, int *, double *, int *); - -int BLASFUNC(chpr2) (char *, int *, float *, - float *, int *, float *, int *, float *); -int BLASFUNC(zhpr2) (char *, int *, double *, - double *, int *, double *, int *, double *); -int BLASFUNC(xhpr2) (char *, int *, double *, - double *, int *, double *, int *, double *); - -int BLASFUNC(chemv) (char *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemv) (char *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(chpmv) (char *, int *, float *, float *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhpmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhpmv) (char *, int *, double *, double *, - double *, int *, double *, double *, int *); - -int BLASFUNC(snorm)(char *, int *, int *, float *, int *); -int BLASFUNC(dnorm)(char *, int *, int *, double *, int *); -int BLASFUNC(cnorm)(char *, int *, int *, float *, int *); -int BLASFUNC(znorm)(char *, int *, int *, double *, int *); - -int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -/* Level 3 routines */ - -int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); - -int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, - float *, int *, float *, int *, float *, float *, int *); -int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); -int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, - double *, int *, double *, int *, double *, double *, int *); - -int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *, - float *, float *, int *); -int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *, - double *, double *, int *); -int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *, - float *, float *, int *); -int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *, - double *, double *, int *); - -int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); - -int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *, - float *, float *, int *, float *, int *); -int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); -int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *, - double *, double *, int *, double *, int *); - -int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); - -int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); - -int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); -int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, - double *, int *, double *, double *, int *); - -int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *, - float *, float *, int *); -int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); -int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *, - double *, double *, int *); - -int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *, - float *, int *, float *, float *, int *); -int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); -int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *, - double*, int *, double *, double *, int *); - -int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, - double *, int *); -int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, - float *, int *); -int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, - double *, int *); - -int BLASFUNC(sgema)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgema)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); -int BLASFUNC(cgema)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgema)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); - -int BLASFUNC(sgems)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(dgems)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); -int BLASFUNC(cgems)(char *, char *, int *, int *, float *, - float *, int *, float *, float *, int *, float *, int *); -int BLASFUNC(zgems)(char *, char *, int *, int *, double *, - double *, int *, double*, double *, int *, double*, int *); - -int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); - -int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); -int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); -int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); - -int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); -int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); -int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); -int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); - -int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); -int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); - -int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *); -int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *); -int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); -int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *); -int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *); - -int BLASFUNC(spotf2)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); - -int BLASFUNC(spotrf)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); - -int BLASFUNC(slauu2)(char *, int *, float *, int *, int *); -int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(clauu2)(char *, int *, float *, int *, int *); -int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); -int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); - -int BLASFUNC(slauum)(char *, int *, float *, int *, int *); -int BLASFUNC(dlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(qlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(clauum)(char *, int *, float *, int *, int *); -int BLASFUNC(zlauum)(char *, int *, double *, int *, int *); -int BLASFUNC(xlauum)(char *, int *, double *, int *, int *); - -int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); - -int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); -int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); -int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); - -int BLASFUNC(spotri)(char *, int *, float *, int *, int *); -int BLASFUNC(dpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(qpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); -int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); -int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/thirdparty/eigen-3.2.10/Eigen/src/plugins/CMakeLists.txt b/thirdparty/eigen-3.2.10/Eigen/src/plugins/CMakeLists.txt index 1a1d3ffbd..e69de29bb 100644 --- a/thirdparty/eigen-3.2.10/Eigen/src/plugins/CMakeLists.txt +++ b/thirdparty/eigen-3.2.10/Eigen/src/plugins/CMakeLists.txt @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_plugins_SRCS "*.h") - -INSTALL(FILES - ${Eigen_plugins_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/plugins COMPONENT Devel - ) diff --git a/thirdparty/eigen/.hgeol b/thirdparty/eigen/.hgeol new file mode 100644 index 000000000..5327df161 --- /dev/null +++ b/thirdparty/eigen/.hgeol @@ -0,0 +1,11 @@ +[patterns] +*.sh = LF +*.MINPACK = CRLF +scripts/*.in = LF +debug/msvc/*.dat = CRLF +debug/msvc/*.natvis = CRLF +unsupported/test/mpreal/*.* = CRLF +** = native + +[repository] +native = LF diff --git a/thirdparty/eigen/.hgignore b/thirdparty/eigen/.hgignore new file mode 100644 index 000000000..769a47f1f --- /dev/null +++ b/thirdparty/eigen/.hgignore @@ -0,0 +1,34 @@ +syntax: glob +qrc_*cxx +*.orig +*.pyc +*.diff +diff +*.save +save +*.old +*.gmo +*.qm +core +core.* +*.bak +*~ +build* +*.moc.* +*.moc +ui_* +CMakeCache.txt +tags +.*.swp +activity.png +*.out +*.php* +*.log +*.orig +*.rej +log +patch +a +a.* +lapack/testing +lapack/reference diff --git a/thirdparty/eigen/.hgtags b/thirdparty/eigen/.hgtags new file mode 100644 index 000000000..7036de122 --- /dev/null +++ b/thirdparty/eigen/.hgtags @@ -0,0 +1,29 @@ +2db9468678c6480c9633b6272ff0e3599d1e11a3 2.0-beta3 +375224817dce669b6fa31d920d4c895a63fabf32 2.0-beta1 +3b8120f077865e2a072e10f5be33e1d942b83a06 2.0-rc1 +19dfc0e7666bcee26f7a49eb42f39a0280a3485e 2.0-beta5 +7a7d8a9526f003ffa2430dfb0c2c535b5add3023 2.0-beta4 +7d14ad088ac23769c349518762704f0257f6a39b 2.0.1 +b9d48561579fd7d4c05b2aa42235dc9de6484bf2 2.0-beta6 +e17630a40408243cb1a51ad0fe3a99beb75b7450 before-hg-migration +eda654d4cda2210ce80719addcf854773e6dec5a 2.0.0 +ee9a7c468a9e73fab12f38f02bac24b07f29ed71 2.0-beta2 +d49097c25d8049e730c254a2fed725a240ce4858 after-hg-migration +655348878731bcb5d9bbe0854077b052e75e5237 actual-start-from-scratch +12a658962d4e6dfdc9a1c350fe7b69e36e70675c 3.0-beta1 +5c4180ad827b3f869b13b1d82f5a6ce617d6fcee 3.0-beta2 +7ae24ca6f3891d5ac58ddc7db60ad413c8d6ec35 3.0-beta3 +c40708b9088d622567fecc9208ad4a426621d364 3.0-beta4 +b6456624eae74f49ae8683d8e7b2882a2ca0342a 3.0-rc1 +a810d5dbab47acfe65b3350236efdd98f67d4d8a 3.1.0-alpha1 +304c88ca3affc16dd0b008b1104873986edd77af 3.1.0-alpha2 +920fc730b5930daae0a6dbe296d60ce2e3808215 3.1.0-beta1 +8383e883ebcc6f14695ff0b5e20bb631abab43fb 3.1.0-rc1 +bf4cb8c934fa3a79f45f1e629610f0225e93e493 3.1.0-rc2 +da195914abcc1d739027cbee7c52077aab30b336 3.2-beta1 +a8e0d153fc5e239ef8b06e3665f1f9e8cb8d49c8 before-evaluators +09a8e21866106b49c5dec1d6d543e5794e82efa0 3.3-alpha1 +ce5a455b34c0a0ac3545a1497cb4a16c38ed90e8 3.3-beta1 +69d418c0699907bcd0bf9e0b3ba0a112ed091d85 3.3-beta2 +bef509908b9da05d0d07ffc0da105e2c8c6d3996 3.3-rc1 +04ab5fa4b241754afcf631117572276444c67239 3.3-rc2 diff --git a/thirdparty/eigen/CMakeLists.txt b/thirdparty/eigen/CMakeLists.txt new file mode 100644 index 000000000..f38e22973 --- /dev/null +++ b/thirdparty/eigen/CMakeLists.txt @@ -0,0 +1,525 @@ +project(Eigen3) + +cmake_minimum_required(VERSION 2.8.5) + +# guard against in-source builds + +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") +endif() + +# Alias Eigen_*_DIR to Eigen3_*_DIR: + +set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) +set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) + +# guard against bad build-type strings + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +string(TOLOWER "${CMAKE_BUILD_TYPE}" cmake_build_type_tolower) +if( NOT cmake_build_type_tolower STREQUAL "debug" + AND NOT cmake_build_type_tolower STREQUAL "release" + AND NOT cmake_build_type_tolower STREQUAL "relwithdebinfo") + message(FATAL_ERROR "Unknown build type \"${CMAKE_BUILD_TYPE}\". Allowed values are Debug, Release, RelWithDebInfo (case-insensitive).") +endif() + + +############################################################################# +# retrieve version infomation # +############################################################################# + +# automatically parse the version number +file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header) +string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") +set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}") +string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") +set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}") +string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}") +set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}") +set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}) + +# if the mercurial program is absent, this will leave the EIGEN_HG_CHANGESET string empty, +# but won't stop CMake. +execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT) +execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT) + +# if this is the default (aka development) branch, extract the mercurial changeset number from the hg tip output... +if(EIGEN_BRANCH_OUTPUT MATCHES "default") +string(REGEX MATCH "^changeset: *[0-9]*:([0-9;a-f]+).*" EIGEN_HG_CHANGESET_MATCH "${EIGEN_HGTIP_OUTPUT}") +set(EIGEN_HG_CHANGESET "${CMAKE_MATCH_1}") +endif(EIGEN_BRANCH_OUTPUT MATCHES "default") +#...and show it next to the version number +if(EIGEN_HG_CHANGESET) + set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (mercurial changeset ${EIGEN_HG_CHANGESET})") +else(EIGEN_HG_CHANGESET) + set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}") +endif(EIGEN_HG_CHANGESET) + + +include(CheckCXXCompilerFlag) +include(GNUInstallDirs) + +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + +############################################################################# +# find how to link to the standard libraries # +############################################################################# + +find_package(StandardMathLibrary) + + +set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.") +set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.") + +set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "") + +if(NOT STANDARD_MATH_LIBRARY_FOUND) + + message(FATAL_ERROR + "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.") + +else() + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}") + else() + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}") + endif() + +endif() + +if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}") +else() + message(STATUS "Standard libraries to link to explicitly: none") +endif() + +option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) + +# Disable pkgconfig only for native Windows builds +if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) + option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON) +endif() + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON) + +option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF) +if(EIGEN_DEFAULT_TO_ROW_MAJOR) + add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR") +endif() + +set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320") + +macro(ei_add_cxx_compiler_flag FLAG) + string(REGEX REPLACE "-" "" SFLAG1 ${FLAG}) + string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1}) + check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG}) + if(COMPILER_SUPPORT_${SFLAG}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}") + endif() +endmacro(ei_add_cxx_compiler_flag) + +if(NOT MSVC) + # We assume that other compilers are partly compatible with GNUCC + + # clang outputs some warnings for unknown flags that are not caught by check_cxx_compiler_flag + # adding -Werror turns such warnings into errors + check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) + if(COMPILER_SUPPORT_WERROR) + set(CMAKE_REQUIRED_FLAGS "-Werror") + endif() + ei_add_cxx_compiler_flag("-pedantic") + ei_add_cxx_compiler_flag("-Wall") + ei_add_cxx_compiler_flag("-Wextra") + #ei_add_cxx_compiler_flag("-Weverything") # clang + + ei_add_cxx_compiler_flag("-Wundef") + ei_add_cxx_compiler_flag("-Wcast-align") + ei_add_cxx_compiler_flag("-Wchar-subscripts") + ei_add_cxx_compiler_flag("-Wnon-virtual-dtor") + ei_add_cxx_compiler_flag("-Wunused-local-typedefs") + ei_add_cxx_compiler_flag("-Wpointer-arith") + ei_add_cxx_compiler_flag("-Wwrite-strings") + ei_add_cxx_compiler_flag("-Wformat-security") + ei_add_cxx_compiler_flag("-Wshorten-64-to-32") + ei_add_cxx_compiler_flag("-Wlogical-op") + ei_add_cxx_compiler_flag("-Wenum-conversion") + ei_add_cxx_compiler_flag("-Wc++11-extensions") + ei_add_cxx_compiler_flag("-Wdouble-promotion") +# ei_add_cxx_compiler_flag("-Wconversion") + + # -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6 + # if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0")) + if(NOT CMAKE_COMPILER_IS_GNUCXX) + ei_add_cxx_compiler_flag("-Wshadow") + endif() + + ei_add_cxx_compiler_flag("-Wno-psabi") + ei_add_cxx_compiler_flag("-Wno-variadic-macros") + ei_add_cxx_compiler_flag("-Wno-long-long") + + ei_add_cxx_compiler_flag("-fno-check-new") + ei_add_cxx_compiler_flag("-fno-common") + ei_add_cxx_compiler_flag("-fstrict-aliasing") + ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark + ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor + + + # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails + # Moreover we should not set both -strict-ansi and -ansi + check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI) + ei_add_cxx_compiler_flag("-Qunused-arguments") # disable clang warning: argument unused during compilation: '-ansi' + + if(COMPILER_SUPPORT_STRICTANSI) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi") + else() + ei_add_cxx_compiler_flag("-ansi") + endif() + + if(ANDROID_NDK) + ei_add_cxx_compiler_flag("-pie") + ei_add_cxx_compiler_flag("-fPIE") + endif() + + set(CMAKE_REQUIRED_FLAGS "") + + option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) + if(EIGEN_TEST_SSE2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") + message(STATUS "Enabling SSE2 in tests/examples") + endif() + + option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF) + if(EIGEN_TEST_SSE3) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") + message(STATUS "Enabling SSE3 in tests/examples") + endif() + + option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF) + if(EIGEN_TEST_SSSE3) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3") + message(STATUS "Enabling SSSE3 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") + message(STATUS "Enabling SSE4.1 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") + message(STATUS "Enabling SSE4.2 in tests/examples") + endif() + + option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF) + if(EIGEN_TEST_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + message(STATUS "Enabling AVX in tests/examples") + endif() + + option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF) + if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") + message(STATUS "Enabling FMA in tests/examples") + endif() + + option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF) + if(EIGEN_TEST_AVX512) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -fabi-version=6 -DEIGEN_ENABLE_AVX512") + message(STATUS "Enabling AVX512 in tests/examples") + endif() + + option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF) + if(EIGEN_TEST_F16C) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c") + message(STATUS "Enabling F16C in tests/examples") + endif() + + option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) + if(EIGEN_TEST_ALTIVEC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") + message(STATUS "Enabling AltiVec in tests/examples") + endif() + + option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF) + if(EIGEN_TEST_VSX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx") + message(STATUS "Enabling VSX in tests/examples") + endif() + + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON) + if(EIGEN_TEST_FMA) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard") + message(STATUS "Enabling NEON in tests/examples") + endif() + + option(EIGEN_TEST_NEON64 "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON64) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + message(STATUS "Enabling NEON in tests/examples") + endif() + + option(EIGEN_TEST_ZVECTOR "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF) + if(EIGEN_TEST_ZVECTOR) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector") + message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples") + endif() + + check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) + if(COMPILER_SUPPORT_OPENMP) + option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) + if(EIGEN_TEST_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + message(STATUS "Enabling OpenMP in tests/examples") + endif() + endif() + +else(NOT MSVC) + + # C4127 - conditional expression is constant + # C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively) + # We can disable this warning in the unit tests since it is clear that it occurs + # because we are oftentimes returning objects that have a destructor or may + # throw exceptions - in particular in the unit tests we are throwing extra many + # exceptions to cover indexing errors. + # C4505 - unreferenced local function has been removed (impossible to deactive selectively) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /wd4127 /wd4505 /wd4714") + + # replace all /Wx by /W4 + string(REGEX REPLACE "/W[0-9]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + check_cxx_compiler_flag("/openmp" COMPILER_SUPPORT_OPENMP) + if(COMPILER_SUPPORT_OPENMP) + option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) + if(EIGEN_TEST_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") + message(STATUS "Enabling OpenMP in tests/examples") + endif() + endif() + + option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) + if(EIGEN_TEST_SSE2) + if(NOT CMAKE_CL_64) + # arch is not supported on 64 bit systems, SSE is enabled automatically. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") + endif(NOT CMAKE_CL_64) + message(STATUS "Enabling SSE2 in tests/examples") + endif(EIGEN_TEST_SSE2) +endif(NOT MSVC) + +option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF) +option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF) +option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF) + +if(EIGEN_TEST_X87) + set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON) + if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387") + message(STATUS "Forcing use of x87 instructions in tests/examples") + else() + message(STATUS "EIGEN_TEST_X87 ignored on your compiler") + endif() +endif() + +if(EIGEN_TEST_32BIT) + if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") + message(STATUS "Forcing generation of 32-bit code in tests/examples") + else() + message(STATUS "EIGEN_TEST_32BIT ignored on your compiler") + endif() +endif() + +if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + add_definitions(-DEIGEN_DONT_VECTORIZE=1) + message(STATUS "Disabling vectorization in tests/examples") +endif() + +option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF) +if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) + add_definitions(-DEIGEN_DONT_ALIGN=1) + message(STATUS "Disabling alignment in tests/examples") +endif() + +option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF) +if(EIGEN_TEST_NO_EXCEPTIONS) + ei_add_cxx_compiler_flag("-fno-exceptions") + message(STATUS "Disabling exceptions in tests/examples") +endif() + +option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF) + +set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code") + +include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + +# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR +if(EIGEN_INCLUDE_INSTALL_DIR) + message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") +endif() + +if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR) + set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed") +else() + set(INCLUDE_INSTALL_DIR + "${CMAKE_INSTALL_INCLUDEDIR}/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed" + ) +endif() +set(CMAKEPACKAGE_INSTALL_DIR + "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed" + ) +set(PKGCONFIG_INSTALL_DIR + "${CMAKE_INSTALL_DATADIR}/pkgconfig" + CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed" + ) + + +# similar to set_target_properties but append the property instead of overwriting it +macro(ei_add_target_property target prop value) + + get_target_property(previous ${target} ${prop}) + # if the property wasn't previously set, ${previous} is now "previous-NOTFOUND" which cmake allows catching with plain if() + if(NOT previous) + set(previous "") + endif(NOT previous) + set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}") +endmacro(ei_add_target_property) + +install(FILES + signature_of_eigen3_matrix_library + DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel + ) + +if(EIGEN_BUILD_PKGCONFIG) + configure_file(eigen3.pc.in eigen3.pc @ONLY) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc + DESTINATION ${PKGCONFIG_INSTALL_DIR} + ) +endif() + +add_subdirectory(Eigen) + +add_subdirectory(doc EXCLUDE_FROM_ALL) + +include(EigenConfigureTesting) + +# fixme, not sure this line is still needed: +enable_testing() # must be called from the root CMakeLists, see man page + + +if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest +else() + add_subdirectory(test EXCLUDE_FROM_ALL) +endif() + +if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(blas) + add_subdirectory(lapack) +else() + add_subdirectory(blas EXCLUDE_FROM_ALL) + add_subdirectory(lapack EXCLUDE_FROM_ALL) +endif() + +# add SYCL +option(EIGEN_TEST_SYCL "Add Sycl support." OFF) +if(EIGEN_TEST_SYCL) + set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}") + include(FindComputeCpp) +endif() + +add_subdirectory(unsupported) + +add_subdirectory(demos EXCLUDE_FROM_ALL) + +# must be after test and unsupported, for configuring buildtests.in +add_subdirectory(scripts EXCLUDE_FROM_ALL) + +# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"? +if(EIGEN_BUILD_BTL) + add_subdirectory(bench/btl EXCLUDE_FROM_ALL) +endif(EIGEN_BUILD_BTL) + +if(NOT WIN32) + add_subdirectory(bench/spbench EXCLUDE_FROM_ALL) +endif(NOT WIN32) + +configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY) + +ei_testing_print_summary() + +message(STATUS "") +message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}") +message(STATUS "") + +option(EIGEN_FAILTEST "Enable failtests." OFF) +if(EIGEN_FAILTEST) + add_subdirectory(failtest) +endif() + +string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower) +if(cmake_generator_tolower MATCHES "makefile") + message(STATUS "Some things you can do now:") + message(STATUS "--------------+--------------------------------------------------------------") + message(STATUS "Command | Description") + message(STATUS "--------------+--------------------------------------------------------------") + message(STATUS "make install | Install Eigen. Headers will be installed to:") + message(STATUS " | /") + message(STATUS " | Using the following values:") + message(STATUS " | CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") + message(STATUS " | INCLUDE_INSTALL_DIR: ${INCLUDE_INSTALL_DIR}") + message(STATUS " | Change the install location of Eigen headers using:") + message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourprefix") + message(STATUS " | Or:") + message(STATUS " | cmake . -DINCLUDE_INSTALL_DIR=yourdir") + message(STATUS "make doc | Generate the API documentation, requires Doxygen & LaTeX") + message(STATUS "make check | Build and run the unit-tests. Read this page:") + message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") + message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)") + message(STATUS "make uninstall| Removes files installed by make install") + message(STATUS "--------------+--------------------------------------------------------------") +else() + message(STATUS "To build/run the unit tests, read this page:") + message(STATUS " http://eigen.tuxfamily.org/index.php?title=Tests") +endif() + +message(STATUS "") + + +set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} ) +set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} ) +set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} ) +set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} ) +set ( EIGEN_DEFINITIONS "") +set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" ) +set ( EIGEN_INCLUDE_DIRS ${EIGEN_INCLUDE_DIR} ) +set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} ) + +configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + @ONLY ESCAPE_QUOTES + ) + +install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} + ) + +# Add uninstall target +add_custom_target ( uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake) diff --git a/thirdparty/eigen/COPYING.BSD b/thirdparty/eigen/COPYING.BSD new file mode 100644 index 000000000..11971ffe2 --- /dev/null +++ b/thirdparty/eigen/COPYING.BSD @@ -0,0 +1,26 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ \ No newline at end of file diff --git a/thirdparty/eigen/COPYING.GPL b/thirdparty/eigen/COPYING.GPL new file mode 100644 index 000000000..94a9ed024 --- /dev/null +++ b/thirdparty/eigen/COPYING.GPL @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/thirdparty/eigen/COPYING.LGPL b/thirdparty/eigen/COPYING.LGPL new file mode 100644 index 000000000..4362b4915 --- /dev/null +++ b/thirdparty/eigen/COPYING.LGPL @@ -0,0 +1,502 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/thirdparty/eigen/COPYING.MINPACK b/thirdparty/eigen/COPYING.MINPACK new file mode 100644 index 000000000..11d8a9a6c --- /dev/null +++ b/thirdparty/eigen/COPYING.MINPACK @@ -0,0 +1,52 @@ +Minpack Copyright Notice (1999) University of Chicago. All rights reserved + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above +copyright notice, this list of conditions and the following +disclaimer. + +2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials +provided with the distribution. + +3. The end-user documentation included with the +redistribution, if any, must include the following +acknowledgment: + + "This product includes software developed by the + University of Chicago, as Operator of Argonne National + Laboratory. + +Alternately, this acknowledgment may appear in the software +itself, if and wherever such third-party acknowledgments +normally appear. + +4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +BE CORRECTED. + +5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +POSSIBILITY OF SUCH LOSS OR DAMAGES. + diff --git a/thirdparty/eigen/COPYING.MPL2 b/thirdparty/eigen/COPYING.MPL2 new file mode 100644 index 000000000..14e2f777f --- /dev/null +++ b/thirdparty/eigen/COPYING.MPL2 @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/thirdparty/eigen/COPYING.README b/thirdparty/eigen/COPYING.README new file mode 100644 index 000000000..de5b63215 --- /dev/null +++ b/thirdparty/eigen/COPYING.README @@ -0,0 +1,18 @@ +Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: + http://www.mozilla.org/MPL/2.0/ + http://www.mozilla.org/MPL/2.0/FAQ.html + +Some files contain third-party code under BSD or LGPL licenses, whence the other +COPYING.* files here. + +All the LGPL code is either LGPL 2.1-only, or LGPL 2.1-or-later. +For this reason, the COPYING.LGPL file contains the LGPL 2.1 text. + +If you want to guarantee that the Eigen code that you are #including is licensed +under the MPL2 and possibly more permissive licenses (like BSD), #define this +preprocessor symbol: + EIGEN_MPL2_ONLY +For example, with most compilers, you could add this to your project CXXFLAGS: + -DEIGEN_MPL2_ONLY +This will cause a compilation error to be generated if you #include any code that is +LGPL licensed. diff --git a/thirdparty/eigen/CTestConfig.cmake b/thirdparty/eigen/CTestConfig.cmake new file mode 100644 index 000000000..4c0027824 --- /dev/null +++ b/thirdparty/eigen/CTestConfig.cmake @@ -0,0 +1,17 @@ +## This file should be placed in the root directory of your project. +## Then modify the CMakeLists.txt file in the root directory of your +## project to incorporate the testing dashboard. +## # The following are required to uses Dart and the Cdash dashboard +## ENABLE_TESTING() +## INCLUDE(CTest) +set(CTEST_PROJECT_NAME "Eigen") +set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC") + +set(CTEST_DROP_METHOD "http") +set(CTEST_DROP_SITE "manao.inria.fr") +set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen") +set(CTEST_DROP_SITE_CDASH TRUE) +set(CTEST_PROJECT_SUBPROJECTS +Official +Unsupported +) diff --git a/thirdparty/eigen/CTestCustom.cmake.in b/thirdparty/eigen/CTestCustom.cmake.in new file mode 100644 index 000000000..9fed9d327 --- /dev/null +++ b/thirdparty/eigen/CTestCustom.cmake.in @@ -0,0 +1,3 @@ + +set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000") +set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "2000") diff --git a/thirdparty/eigen/Eigen/CMakeLists.txt b/thirdparty/eigen/Eigen/CMakeLists.txt new file mode 100644 index 000000000..9eb502b79 --- /dev/null +++ b/thirdparty/eigen/Eigen/CMakeLists.txt @@ -0,0 +1,19 @@ +include(RegexUtils) +test_escape_string_as_regex() + +file(GLOB Eigen_directory_files "*") + +escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") + +foreach(f ${Eigen_directory_files}) + if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src") + list(APPEND Eigen_directory_files_to_install ${f}) + endif() +endforeach(f ${Eigen_directory_files}) + +install(FILES + ${Eigen_directory_files_to_install} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/thirdparty/eigen/Eigen/Cholesky b/thirdparty/eigen/Eigen/Cholesky new file mode 100644 index 000000000..369d1f5ec --- /dev/null +++ b/thirdparty/eigen/Eigen/Cholesky @@ -0,0 +1,41 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLESKY_MODULE_H +#define EIGEN_CHOLESKY_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Cholesky_Module Cholesky module + * + * + * + * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are also accessible via the following methods: + * - MatrixBase::llt() + * - MatrixBase::ldlt() + * - SelfAdjointView::llt() + * - SelfAdjointView::ldlt() + * + * \code + * #include + * \endcode + */ + +#include "src/Cholesky/LLT.h" +#include "src/Cholesky/LDLT.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/Cholesky/LLT_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLESKY_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/CholmodSupport b/thirdparty/eigen/Eigen/CholmodSupport new file mode 100644 index 000000000..bed8924d3 --- /dev/null +++ b/thirdparty/eigen/Eigen/CholmodSupport @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H +#define EIGEN_CHOLMODSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { + #include +} + +/** \ingroup Support_modules + * \defgroup CholmodSupport_Module CholmodSupport module + * + * This module provides an interface to the Cholmod library which is part of the suitesparse package. + * It provides the two following main factorization classes: + * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. + * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). + * + * For the sake of completeness, this module also propose the two following classes: + * - class CholmodSimplicialLLT + * - class CholmodSimplicialLDLT + * Note that these classes does not bring any particular advantage compared to the built-in + * SimplicialLLT and SimplicialLDLT factorization classes. + * + * \code + * #include + * \endcode + * + * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be linked to the cholmod library and its dependencies. + * The dependencies depend on how cholmod has been compiled. + * For a cmake based project, you can use our FindCholmod.cmake module to help you in this task. + * + */ + +#include "src/CholmodSupport/CholmodSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLMODSUPPORT_MODULE_H + diff --git a/thirdparty/eigen/Eigen/Core b/thirdparty/eigen/Eigen/Core new file mode 100644 index 000000000..444c1c8d7 --- /dev/null +++ b/thirdparty/eigen/Eigen/Core @@ -0,0 +1,519 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CORE_H +#define EIGEN_CORE_H + +// first thing Eigen does: stop the compiler from committing suicide +#include "src/Core/util/DisableStupidWarnings.h" + +// Handle NVCC/CUDA/SYCL +#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) + // Do not try asserts on CUDA and SYCL! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + // All functions callable from CUDA code must be qualified with __device__ + #ifdef __CUDACC__ + // Do not try to vectorize on CUDA and SYCL! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #define EIGEN_DEVICE_FUNC __host__ __device__ + // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro + // works properly on the device side + #include + #else + #define EIGEN_DEVICE_FUNC + #endif +#else + #define EIGEN_DEVICE_FUNC +#endif + +// When compiling CUDA device code with NVCC, pull in math functions from the +// global namespace. In host mode, and when device doee with clang, use the +// std versions. +#if defined(__CUDA_ARCH__) && defined(__NVCC__) + #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) + #define EIGEN_EXCEPTIONS +#endif + +#ifdef EIGEN_EXCEPTIONS + #include +#endif + +// then include this file where all our macros are defined. It's really important to do it first because +// it's where we do all the alignment settings (platform detection and honoring the user's will if he +// defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. +#include "src/Core/util/Macros.h" + +// Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) +// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. +#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) + #pragma GCC optimize ("-fno-ipa-cp-clone") +#endif + +#include + +// this include file manages BLAS and MKL related macros +// and inclusion of their respective header files +#include "src/Core/util/MKL_support.h" + +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif +#endif + +#if EIGEN_COMP_MSVC + #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later + // Remember that usage of defined() in a #define is undefined by the standard. + // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 + #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER + #endif + #endif +#else + // Remember that usage of defined() in a #define is undefined by the standard + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) + #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC + #endif +#endif + +#ifndef EIGEN_DONT_VECTORIZE + + #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER) + + // Defines symbols for compile-time detection of which instructions are + // used. + // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_SSE + #define EIGEN_VECTORIZE_SSE2 + + // Detect sse3/ssse3/sse4: + // gcc and icc defines __SSE3__, ... + // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you + // want to force the use of those instructions with msvc. + #ifdef __SSE3__ + #define EIGEN_VECTORIZE_SSE3 + #endif + #ifdef __SSSE3__ + #define EIGEN_VECTORIZE_SSSE3 + #endif + #ifdef __SSE4_1__ + #define EIGEN_VECTORIZE_SSE4_1 + #endif + #ifdef __SSE4_2__ + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX__ + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #define EIGEN_VECTORIZE_AVX2 + #endif + #ifdef __FMA__ + #define EIGEN_VECTORIZE_FMA + #endif + #if defined(__AVX512F__) + #define EIGEN_VECTORIZE_AVX512 + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_FMA + #ifdef __AVX512DQ__ + #define EIGEN_VECTORIZE_AVX512DQ + #endif + #endif + + // include files + + // This extern "C" works around a MINGW-w64 compilation issue + // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354 + // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do). + // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations + // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know; + // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. + // notice that since these are C headers, the extern "C" is theoretically needed anyways. + extern "C" { + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #if EIGEN_COMP_ICC >= 1110 + #include + #else + #include + #include + #include + #ifdef EIGEN_VECTORIZE_SSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include + #endif + #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) + #include + #endif + #endif + } // end extern "C" + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + #elif defined __ALTIVEC__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ALTIVEC + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel + #elif (defined __ARM_NEON) || (defined __ARM_NEON__) + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_NEON + #include + #elif (defined __s390x__ && defined __VEC__) + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ZVECTOR + #include + #endif +#endif + +#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C +#endif + +#if defined __CUDACC__ + #define EIGEN_VECTORIZE_CUDA + #include + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #endif +#endif + +#if defined EIGEN_HAS_CUDA_FP16 + #include + #include +#endif + +#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) + #define EIGEN_HAS_OPENMP +#endif + +#ifdef EIGEN_HAS_OPENMP +#include +#endif + +// MSVC for windows mobile does not have the errno.h file +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM +#define EIGEN_HAS_ERRNO +#endif + +#ifdef EIGEN_HAS_ERRNO +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // for CHAR_BIT +// for min/max: +#include + +// for std::is_nothrow_move_assignable +#ifdef EIGEN_INCLUDE_TYPE_TRAITS +#include +#endif + +// for outputting debug info +#ifdef EIGEN_DEBUG_ASSIGN +#include +#endif + +// required for __cpuid, needs to be included after cmath +#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE + #include +#endif + +#if defined(__SYCL_DEVICE_ONLY__) + #undef min + #undef max + #undef isnan + #undef isinf + #undef isfinite + #include +#endif + +/** \brief Namespace containing all symbols from the %Eigen library. */ +namespace Eigen { + +inline static const char *SimdInstructionSetsInUse(void) { +#if defined(EIGEN_VECTORIZE_AVX512) + return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_1) + return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; +#elif defined(EIGEN_VECTORIZE_SSSE3) + return "SSE, SSE2, SSE3, SSSE3"; +#elif defined(EIGEN_VECTORIZE_SSE3) + return "SSE, SSE2, SSE3"; +#elif defined(EIGEN_VECTORIZE_SSE2) + return "SSE, SSE2"; +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; +#elif defined(EIGEN_VECTORIZE_NEON) + return "ARM NEON"; +#elif defined(EIGEN_VECTORIZE_ZVECTOR) + return "S390X ZVECTOR"; +#else + return "None"; +#endif +} + +} // end namespace Eigen + +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT +// This will generate an error message: +#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information +#endif + +// we use size_t frequently and we'll never remember to prepend it with std:: everytime just to +// ensure QNX/QCC support +using std::size_t; +// gcc 4.6.0 wants std:: for ptrdiff_t +using std::ptrdiff_t; + +/** \defgroup Core_Module Core module + * This is the main module of Eigen providing dense matrix and vector support + * (both fixed and dynamic size) with all the features corresponding to a BLAS library + * and much more... + * + * \code + * #include + * \endcode + */ + +#include "src/Core/util/Constants.h" +#include "src/Core/util/Meta.h" +#include "src/Core/util/ForwardDeclarations.h" +#include "src/Core/util/StaticAssert.h" +#include "src/Core/util/XprHelper.h" +#include "src/Core/util/Memory.h" + +#include "src/Core/NumTraits.h" +#include "src/Core/MathFunctions.h" +#include "src/Core/GenericPacketMath.h" +#include "src/Core/MathFunctionsImpl.h" + +#if defined EIGEN_VECTORIZE_AVX512 + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX512/PacketMath.h" + #include "src/Core/arch/AVX512/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_AVX + // Use AVX for floats and doubles, SSE for integers + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX/MathFunctions.h" + #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" +#elif defined EIGEN_VECTORIZE_SSE + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/TypeCasting.h" +#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) + #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/MathFunctions.h" + #include "src/Core/arch/AltiVec/Complex.h" +#elif defined EIGEN_VECTORIZE_NEON + #include "src/Core/arch/NEON/PacketMath.h" + #include "src/Core/arch/NEON/MathFunctions.h" + #include "src/Core/arch/NEON/Complex.h" +#elif defined EIGEN_VECTORIZE_ZVECTOR + #include "src/Core/arch/ZVector/PacketMath.h" + #include "src/Core/arch/ZVector/MathFunctions.h" + #include "src/Core/arch/ZVector/Complex.h" +#endif + +// Half float support +#include "src/Core/arch/CUDA/Half.h" +#include "src/Core/arch/CUDA/PacketMathHalf.h" +#include "src/Core/arch/CUDA/TypeCasting.h" + +#if defined EIGEN_VECTORIZE_CUDA + #include "src/Core/arch/CUDA/PacketMath.h" + #include "src/Core/arch/CUDA/MathFunctions.h" +#endif + +#include "src/Core/arch/Default/Settings.h" + +#include "src/Core/functors/TernaryFunctors.h" +#include "src/Core/functors/BinaryFunctors.h" +#include "src/Core/functors/UnaryFunctors.h" +#include "src/Core/functors/NullaryFunctors.h" +#include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" + +// Specialized functors to enable the processing of complex numbers +// on CUDA devices +#include "src/Core/arch/CUDA/Complex.h" + +#include "src/Core/DenseCoeffsBase.h" +#include "src/Core/DenseBase.h" +#include "src/Core/MatrixBase.h" +#include "src/Core/EigenBase.h" + +#include "src/Core/Product.h" +#include "src/Core/CoreEvaluators.h" +#include "src/Core/AssignEvaluator.h" + +#ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 + // at least confirmed with Doxygen 1.5.5 and 1.5.6 + #include "src/Core/Assign.h" +#endif + +#include "src/Core/ArrayBase.h" +#include "src/Core/util/BlasUtil.h" +#include "src/Core/DenseStorage.h" +#include "src/Core/NestByValue.h" + +// #include "src/Core/ForceAlignedAccess.h" + +#include "src/Core/ReturnByValue.h" +#include "src/Core/NoAlias.h" +#include "src/Core/PlainObjectBase.h" +#include "src/Core/Matrix.h" +#include "src/Core/Array.h" +#include "src/Core/CwiseTernaryOp.h" +#include "src/Core/CwiseBinaryOp.h" +#include "src/Core/CwiseUnaryOp.h" +#include "src/Core/CwiseNullaryOp.h" +#include "src/Core/CwiseUnaryView.h" +#include "src/Core/SelfCwiseBinaryOp.h" +#include "src/Core/Dot.h" +#include "src/Core/StableNorm.h" +#include "src/Core/Stride.h" +#include "src/Core/MapBase.h" +#include "src/Core/Map.h" +#include "src/Core/Ref.h" +#include "src/Core/Block.h" +#include "src/Core/VectorBlock.h" +#include "src/Core/Transpose.h" +#include "src/Core/DiagonalMatrix.h" +#include "src/Core/Diagonal.h" +#include "src/Core/DiagonalProduct.h" +#include "src/Core/Redux.h" +#include "src/Core/Visitor.h" +#include "src/Core/Fuzzy.h" +#include "src/Core/IO.h" +#include "src/Core/Swap.h" +#include "src/Core/CommaInitializer.h" +#include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" +#include "src/Core/TriangularMatrix.h" +#include "src/Core/SelfAdjointView.h" +#include "src/Core/products/GeneralBlockPanelKernel.h" +#include "src/Core/products/Parallelizer.h" +#include "src/Core/ProductEvaluators.h" +#include "src/Core/products/GeneralMatrixVector.h" +#include "src/Core/products/GeneralMatrixMatrix.h" +#include "src/Core/SolveTriangular.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular.h" +#include "src/Core/products/SelfadjointMatrixVector.h" +#include "src/Core/products/SelfadjointMatrixMatrix.h" +#include "src/Core/products/SelfadjointProduct.h" +#include "src/Core/products/SelfadjointRank2Update.h" +#include "src/Core/products/TriangularMatrixVector.h" +#include "src/Core/products/TriangularMatrixMatrix.h" +#include "src/Core/products/TriangularSolverMatrix.h" +#include "src/Core/products/TriangularSolverVector.h" +#include "src/Core/BandMatrix.h" +#include "src/Core/CoreIterators.h" +#include "src/Core/ConditionEstimator.h" + +#include "src/Core/BooleanRedux.h" +#include "src/Core/Select.h" +#include "src/Core/VectorwiseOp.h" +#include "src/Core/Random.h" +#include "src/Core/Replicate.h" +#include "src/Core/Reverse.h" +#include "src/Core/ArrayWrapper.h" + +#ifdef EIGEN_USE_BLAS +#include "src/Core/products/GeneralMatrixMatrix_BLAS.h" +#include "src/Core/products/GeneralMatrixVector_BLAS.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h" +#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h" +#include "src/Core/products/SelfadjointMatrixVector_BLAS.h" +#include "src/Core/products/TriangularMatrixMatrix_BLAS.h" +#include "src/Core/products/TriangularMatrixVector_BLAS.h" +#include "src/Core/products/TriangularSolverMatrix_BLAS.h" +#endif // EIGEN_USE_BLAS + +#ifdef EIGEN_USE_MKL_VML +#include "src/Core/Assign_MKL.h" +#endif + +#include "src/Core/GlobalFunctions.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CORE_H diff --git a/thirdparty/eigen/Eigen/Dense b/thirdparty/eigen/Eigen/Dense new file mode 100644 index 000000000..5768910bd --- /dev/null +++ b/thirdparty/eigen/Eigen/Dense @@ -0,0 +1,7 @@ +#include "Core" +#include "LU" +#include "Cholesky" +#include "QR" +#include "SVD" +#include "Geometry" +#include "Eigenvalues" diff --git a/thirdparty/eigen/Eigen/Eigen b/thirdparty/eigen/Eigen/Eigen new file mode 100644 index 000000000..654c8dc63 --- /dev/null +++ b/thirdparty/eigen/Eigen/Eigen @@ -0,0 +1,2 @@ +#include "Dense" +#include "Sparse" diff --git a/thirdparty/eigen/Eigen/Eigenvalues b/thirdparty/eigen/Eigen/Eigenvalues new file mode 100644 index 000000000..009e529e1 --- /dev/null +++ b/thirdparty/eigen/Eigen/Eigenvalues @@ -0,0 +1,57 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENVALUES_MODULE_H +#define EIGEN_EIGENVALUES_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" +#include "LU" +#include "Geometry" + +/** \defgroup Eigenvalues_Module Eigenvalues module + * + * + * + * This module mainly provides various eigenvalue solvers. + * This module also provides some MatrixBase methods, including: + * - MatrixBase::eigenvalues(), + * - MatrixBase::operatorNorm() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/RealSvd2x2.h" +#include "src/Eigenvalues/Tridiagonalization.h" +#include "src/Eigenvalues/RealSchur.h" +#include "src/Eigenvalues/EigenSolver.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver.h" +#include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h" +#include "src/Eigenvalues/HessenbergDecomposition.h" +#include "src/Eigenvalues/ComplexSchur.h" +#include "src/Eigenvalues/ComplexEigenSolver.h" +#include "src/Eigenvalues/RealQZ.h" +#include "src/Eigenvalues/GeneralizedEigenSolver.h" +#include "src/Eigenvalues/MatrixBaseEigenvalues.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/Eigenvalues/RealSchur_LAPACKE.h" +#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_EIGENVALUES_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/Geometry b/thirdparty/eigen/Eigen/Geometry new file mode 100644 index 000000000..716d52952 --- /dev/null +++ b/thirdparty/eigen/Eigen/Geometry @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GEOMETRY_MODULE_H +#define EIGEN_GEOMETRY_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SVD" +#include "LU" +#include + +/** \defgroup Geometry_Module Geometry module + * + * This module provides support for: + * - fixed-size homogeneous transformations + * - translation, scaling, 2D and 3D rotations + * - \link Quaternion quaternions \endlink + * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3) + * - orthognal vector generation (\ref MatrixBase::unitOrthogonal) + * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink + * - \link AlignedBox axis aligned bounding boxes \endlink + * - \link umeyama least-square transformation fitting \endlink + * + * \code + * #include + * \endcode + */ + +#include "src/Geometry/OrthoMethods.h" +#include "src/Geometry/EulerAngles.h" + +#include "src/Geometry/Homogeneous.h" +#include "src/Geometry/RotationBase.h" +#include "src/Geometry/Rotation2D.h" +#include "src/Geometry/Quaternion.h" +#include "src/Geometry/AngleAxis.h" +#include "src/Geometry/Transform.h" +#include "src/Geometry/Translation.h" +#include "src/Geometry/Scaling.h" +#include "src/Geometry/Hyperplane.h" +#include "src/Geometry/ParametrizedLine.h" +#include "src/Geometry/AlignedBox.h" +#include "src/Geometry/Umeyama.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +#include "src/Geometry/arch/Geometry_SSE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_GEOMETRY_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/thirdparty/eigen/Eigen/Householder b/thirdparty/eigen/Eigen/Householder new file mode 100644 index 000000000..89cd81b1a --- /dev/null +++ b/thirdparty/eigen/Eigen/Householder @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_HOUSEHOLDER_MODULE_H +#define EIGEN_HOUSEHOLDER_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Householder_Module Householder module + * This module provides Householder transformations. + * + * \code + * #include + * \endcode + */ + +#include "src/Householder/Householder.h" +#include "src/Householder/HouseholderSequence.h" +#include "src/Householder/BlockHouseholder.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_HOUSEHOLDER_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/IterativeLinearSolvers b/thirdparty/eigen/Eigen/IterativeLinearSolvers new file mode 100644 index 000000000..957d5750b --- /dev/null +++ b/thirdparty/eigen/Eigen/IterativeLinearSolvers @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H +#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module + * + * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. + * Those solvers are accessible via the following classes: + * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, + * - BiCGSTAB for general square matrices. + * + * These iterative solvers are associated with some preconditioners: + * - IdentityPreconditioner - not really useful + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteLUT - incomplete LU factorization with dual thresholding + * + * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. + * + \code + #include + \endcode + */ + +#include "src/IterativeLinearSolvers/SolveWithGuess.h" +#include "src/IterativeLinearSolvers/IterativeSolverBase.h" +#include "src/IterativeLinearSolvers/BasicPreconditioners.h" +#include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" +#include "src/IterativeLinearSolvers/BiCGSTAB.h" +#include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H diff --git a/thirdparty/eigen/Eigen/Jacobi b/thirdparty/eigen/Eigen/Jacobi new file mode 100644 index 000000000..17c1d785a --- /dev/null +++ b/thirdparty/eigen/Eigen/Jacobi @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_JACOBI_MODULE_H +#define EIGEN_JACOBI_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Jacobi_Module Jacobi module + * This module provides Jacobi and Givens rotations. + * + * \code + * #include + * \endcode + * + * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation: + * - MatrixBase::applyOnTheLeft() + * - MatrixBase::applyOnTheRight(). + */ + +#include "src/Jacobi/Jacobi.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_JACOBI_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/thirdparty/eigen/Eigen/LU b/thirdparty/eigen/Eigen/LU new file mode 100644 index 000000000..6f6c55629 --- /dev/null +++ b/thirdparty/eigen/Eigen/LU @@ -0,0 +1,46 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LU_MODULE_H +#define EIGEN_LU_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup LU_Module LU module + * This module includes %LU decomposition and related notions such as matrix inversion and determinant. + * This module defines the following MatrixBase methods: + * - MatrixBase::inverse() + * - MatrixBase::determinant() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Kernel.h" +#include "src/misc/Image.h" +#include "src/LU/FullPivLU.h" +#include "src/LU/PartialPivLU.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/LU/PartialPivLU_LAPACKE.h" +#endif +#include "src/LU/Determinant.h" +#include "src/LU/InverseImpl.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX + #include "src/LU/arch/Inverse_SSE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_LU_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/MetisSupport b/thirdparty/eigen/Eigen/MetisSupport new file mode 100644 index 000000000..85c41bf34 --- /dev/null +++ b/thirdparty/eigen/Eigen/MetisSupport @@ -0,0 +1,35 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include + * \endcode + * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). + * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/thirdparty/eigen/Eigen/OrderingMethods b/thirdparty/eigen/Eigen/OrderingMethods new file mode 100644 index 000000000..d8ea36193 --- /dev/null +++ b/thirdparty/eigen/Eigen/OrderingMethods @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ORDERINGMETHODS_MODULE_H +#define EIGEN_ORDERINGMETHODS_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup OrderingMethods_Module OrderingMethods module + * + * This module is currently for internal use only + * + * It defines various built-in and external ordering methods for sparse matrices. + * They are typically used to reduce the number of elements during + * the sparse matrix decomposition (LLT, LU, QR). + * Precisely, in a preprocessing step, a permutation matrix P is computed using + * those ordering methods and applied to the columns of the matrix. + * Using for instance the sparse Cholesky decomposition, it is expected that + * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A). + * + * + * Usage : + * \code + * #include + * \endcode + * + * A simple usage is as a template parameter in the sparse decomposition classes : + * + * \code + * SparseLU > solver; + * \endcode + * + * \code + * SparseQR > solver; + * \endcode + * + * It is possible as well to call directly a particular ordering method for your own purpose, + * \code + * AMDOrdering ordering; + * PermutationMatrix perm; + * SparseMatrix A; + * //Fill the matrix ... + * + * ordering(A, perm); // Call AMD + * \endcode + * + * \note Some of these methods (like AMD or METIS), need the sparsity pattern + * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, + * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. + * If your matrix is already symmetric (at leat in structure), you can avoid that + * by calling the method with a SelfAdjointView type. + * + * \code + * // Call the ordering on the pattern of the lower triangular matrix A + * ordering(A.selfadjointView(), perm); + * \endcode + */ + +#ifndef EIGEN_MPL2_ONLY +#include "src/OrderingMethods/Amd.h" +#endif + +#include "src/OrderingMethods/Ordering.h" +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/thirdparty/eigen/Eigen/PaStiXSupport b/thirdparty/eigen/Eigen/PaStiXSupport new file mode 100644 index 000000000..de3a63b4d --- /dev/null +++ b/thirdparty/eigen/Eigen/PaStiXSupport @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PASTIXSUPPORT_MODULE_H +#define EIGEN_PASTIXSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +#include +} + +#ifdef complex +#undef complex +#endif + +/** \ingroup Support_modules + * \defgroup PaStiXSupport_Module PaStiXSupport module + * + * This module provides an interface to the PaSTiX library. + * PaSTiX is a general \b supernodal, \b parallel and \b opensource sparse solver. + * It provides the two following main factorization classes: + * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization. + * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization. + * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern). + * + * \code + * #include + * \endcode + * + * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be linked to the PaSTiX library and its dependencies. + * The dependencies depend on how PaSTiX has been compiled. + * For a cmake based project, you can use our FindPaSTiX.cmake module to help you in this task. + * + */ + +#include "src/PaStiXSupport/PaStiXSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/thirdparty/eigen/Eigen/PardisoSupport b/thirdparty/eigen/Eigen/PardisoSupport new file mode 100755 index 000000000..340edf51f --- /dev/null +++ b/thirdparty/eigen/Eigen/PardisoSupport @@ -0,0 +1,35 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARDISOSUPPORT_MODULE_H +#define EIGEN_PARDISOSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include + +/** \ingroup Support_modules + * \defgroup PardisoSupport_Module PardisoSupport module + * + * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers. + * + * \code + * #include + * \endcode + * + * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be linked to the MKL library and its dependencies. + * See this \ref TopicUsingIntelMKL "page" for more information on MKL-Eigen integration. + * + */ + +#include "src/PardisoSupport/PardisoSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/thirdparty/eigen/Eigen/QR b/thirdparty/eigen/Eigen/QR new file mode 100644 index 000000000..80838e3bd --- /dev/null +++ b/thirdparty/eigen/Eigen/QR @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_QR_MODULE_H +#define EIGEN_QR_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" + +/** \defgroup QR_Module QR module + * + * + * + * This module provides various QR decompositions + * This module also provides some MatrixBase methods, including: + * - MatrixBase::householderQr() + * - MatrixBase::colPivHouseholderQr() + * - MatrixBase::fullPivHouseholderQr() + * + * \code + * #include + * \endcode + */ + +#include "src/QR/HouseholderQR.h" +#include "src/QR/FullPivHouseholderQR.h" +#include "src/QR/ColPivHouseholderQR.h" +#include "src/QR/CompleteOrthogonalDecomposition.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke.h" +#include "src/QR/HouseholderQR_LAPACKE.h" +#include "src/QR/ColPivHouseholderQR_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_QR_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/QtAlignedMalloc b/thirdparty/eigen/Eigen/QtAlignedMalloc new file mode 100644 index 000000000..4044d5ac5 --- /dev/null +++ b/thirdparty/eigen/Eigen/QtAlignedMalloc @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_QTMALLOC_MODULE_H +#define EIGEN_QTMALLOC_MODULE_H + +#include "Core" + +#if (!EIGEN_MALLOC_ALREADY_ALIGNED) + +#include "src/Core/util/DisableStupidWarnings.h" + +void *qMalloc(size_t size) +{ + return Eigen::internal::aligned_malloc(size); +} + +void qFree(void *ptr) +{ + Eigen::internal::aligned_free(ptr); +} + +void *qRealloc(void *ptr, size_t size) +{ + void* newPtr = Eigen::internal::aligned_malloc(size); + memcpy(newPtr, ptr, size); + Eigen::internal::aligned_free(ptr); + return newPtr; +} + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif + +#endif // EIGEN_QTMALLOC_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/SPQRSupport b/thirdparty/eigen/Eigen/SPQRSupport new file mode 100644 index 000000000..f70390c17 --- /dev/null +++ b/thirdparty/eigen/Eigen/SPQRSupport @@ -0,0 +1,34 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPQRSUPPORT_MODULE_H +#define EIGEN_SPQRSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SuiteSparseQR.hpp" + +/** \ingroup Support_modules + * \defgroup SPQRSupport_Module SuiteSparseQR module + * + * This module provides an interface to the SPQR library, which is part of the suitesparse package. + * + * \code + * #include + * \endcode + * + * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...). + * For a cmake based project, you can use our FindSPQR.cmake and FindCholmod.Cmake modules + * + */ + +#include "src/CholmodSupport/CholmodSupport.h" +#include "src/SPQRSupport/SuiteSparseQRSupport.h" + +#endif diff --git a/thirdparty/eigen/Eigen/SVD b/thirdparty/eigen/Eigen/SVD new file mode 100644 index 000000000..86143c23d --- /dev/null +++ b/thirdparty/eigen/Eigen/SVD @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SVD_MODULE_H +#define EIGEN_SVD_MODULE_H + +#include "QR" +#include "Householder" +#include "Jacobi" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SVD_Module SVD module + * + * + * + * This module provides SVD decomposition for matrices (both real and complex). + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. + * These decompositions are accessible via the respective classes and following MatrixBase methods: + * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/RealSvd2x2.h" +#include "src/SVD/UpperBidiagonalization.h" +#include "src/SVD/SVDBase.h" +#include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" +#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) +#include "src/misc/lapacke.h" +#include "src/SVD/JacobiSVD_LAPACKE.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SVD_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/Eigen/Sparse b/thirdparty/eigen/Eigen/Sparse new file mode 100644 index 000000000..a2ef7a665 --- /dev/null +++ b/thirdparty/eigen/Eigen/Sparse @@ -0,0 +1,34 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_MODULE_H +#define EIGEN_SPARSE_MODULE_H + +/** \defgroup Sparse_Module Sparse meta-module + * + * Meta-module including all related modules: + * - \ref SparseCore_Module + * - \ref OrderingMethods_Module + * - \ref SparseCholesky_Module + * - \ref SparseLU_Module + * - \ref SparseQR_Module + * - \ref IterativeLinearSolvers_Module + * + \code + #include + \endcode + */ + +#include "SparseCore" +#include "OrderingMethods" +#include "SparseCholesky" +#include "SparseLU" +#include "SparseQR" +#include "IterativeLinearSolvers" + +#endif // EIGEN_SPARSE_MODULE_H + diff --git a/thirdparty/eigen/Eigen/SparseCholesky b/thirdparty/eigen/Eigen/SparseCholesky new file mode 100644 index 000000000..b6a320c40 --- /dev/null +++ b/thirdparty/eigen/Eigen/SparseCholesky @@ -0,0 +1,45 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECHOLESKY_MODULE_H +#define EIGEN_SPARSECHOLESKY_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup SparseCholesky_Module SparseCholesky module + * + * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are accessible via the following classes: + * - SimplicialLLt, + * - SimplicialLDLt + * + * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module. + * + * \code + * #include + * \endcode + */ + +#ifdef EIGEN_MPL2_ONLY +#error The SparseCholesky module has nothing to offer in MPL2 only mode +#endif + +#include "src/SparseCholesky/SimplicialCholesky.h" + +#ifndef EIGEN_MPL2_ONLY +#include "src/SparseCholesky/SimplicialCholesky_impl.h" +#endif + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECHOLESKY_MODULE_H diff --git a/thirdparty/eigen/Eigen/SparseCore b/thirdparty/eigen/Eigen/SparseCore new file mode 100644 index 000000000..76966c4c4 --- /dev/null +++ b/thirdparty/eigen/Eigen/SparseCore @@ -0,0 +1,69 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECORE_MODULE_H +#define EIGEN_SPARSECORE_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include +#include +#include +#include +#include + +/** + * \defgroup SparseCore_Module SparseCore module + * + * This module provides a sparse matrix representation, and basic associated matrix manipulations + * and operations. + * + * See the \ref TutorialSparse "Sparse tutorial" + * + * \code + * #include + * \endcode + * + * This module depends on: Core. + */ + +#include "src/SparseCore/SparseUtil.h" +#include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" +#include "src/SparseCore/CompressedStorage.h" +#include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseCompressedBase.h" +#include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/SparseMap.h" +#include "src/SparseCore/MappedSparseMatrix.h" +#include "src/SparseCore/SparseVector.h" +#include "src/SparseCore/SparseRef.h" +#include "src/SparseCore/SparseCwiseUnaryOp.h" +#include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" +#include "src/SparseCore/SparseDot.h" +#include "src/SparseCore/SparseRedux.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" +#include "src/SparseCore/ConservativeSparseSparseProduct.h" +#include "src/SparseCore/SparseSparseProductWithPruning.h" +#include "src/SparseCore/SparseProduct.h" +#include "src/SparseCore/SparseDenseProduct.h" +#include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" +#include "src/SparseCore/TriangularSolver.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECORE_MODULE_H + diff --git a/thirdparty/eigen/Eigen/SparseLU b/thirdparty/eigen/Eigen/SparseLU new file mode 100644 index 000000000..38b38b531 --- /dev/null +++ b/thirdparty/eigen/Eigen/SparseLU @@ -0,0 +1,46 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + +/** + * \defgroup SparseLU_Module SparseLU module + * This module defines a supernodal factorization of general sparse matrices. + * The code is fully optimized for supernode-panel updates with specialized kernels. + * Please, see the documentation of the SparseLU class for more details. + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU_gemm_kernel.h" + +#include "src/SparseLU/SparseLU_Structs.h" +#include "src/SparseLU/SparseLU_SupernodalMatrix.h" +#include "src/SparseLU/SparseLUImpl.h" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseLU/SparseLU_Memory.h" +#include "src/SparseLU/SparseLU_heap_relax_snode.h" +#include "src/SparseLU/SparseLU_relax_snode.h" +#include "src/SparseLU/SparseLU_pivotL.h" +#include "src/SparseLU/SparseLU_panel_dfs.h" +#include "src/SparseLU/SparseLU_kernel_bmod.h" +#include "src/SparseLU/SparseLU_panel_bmod.h" +#include "src/SparseLU/SparseLU_column_dfs.h" +#include "src/SparseLU/SparseLU_column_bmod.h" +#include "src/SparseLU/SparseLU_copy_to_ucol.h" +#include "src/SparseLU/SparseLU_pruneL.h" +#include "src/SparseLU/SparseLU_Utils.h" +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/thirdparty/eigen/Eigen/SparseQR b/thirdparty/eigen/Eigen/SparseQR new file mode 100644 index 000000000..a6f3b7f7d --- /dev/null +++ b/thirdparty/eigen/Eigen/SparseQR @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSEQR_MODULE_H +#define EIGEN_SPARSEQR_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SparseQR_Module SparseQR module + * \brief Provides QR decomposition for sparse matrices + * + * This module provides a simplicial version of the left-looking Sparse QR decomposition. + * The columns of the input matrix should be reordered to limit the fill-in during the + * decomposition. Built-in methods (COLAMD, AMD) or external methods (METIS) can be used to this end. + * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list + * of built-in and external ordering methods. + * + * \code + * #include + * \endcode + * + * + */ + +#include "OrderingMethods" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseQR/SparseQR.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif diff --git a/thirdparty/eigen/Eigen/StdDeque b/thirdparty/eigen/Eigen/StdDeque new file mode 100644 index 000000000..be3a7f82b --- /dev/null +++ b/thirdparty/eigen/Eigen/StdDeque @@ -0,0 +1,27 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDDEQUE_MODULE_H +#define EIGEN_STDDEQUE_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ + +#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) + +#else + +#include "src/StlSupport/StdDeque.h" + +#endif + +#endif // EIGEN_STDDEQUE_MODULE_H diff --git a/thirdparty/eigen/Eigen/StdList b/thirdparty/eigen/Eigen/StdList new file mode 100644 index 000000000..07ba1297b --- /dev/null +++ b/thirdparty/eigen/Eigen/StdList @@ -0,0 +1,26 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDLIST_MODULE_H +#define EIGEN_STDLIST_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ + +#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) + +#else + +#include "src/StlSupport/StdList.h" + +#endif + +#endif // EIGEN_STDLIST_MODULE_H diff --git a/thirdparty/eigen/Eigen/StdVector b/thirdparty/eigen/Eigen/StdVector new file mode 100644 index 000000000..fdfc37766 --- /dev/null +++ b/thirdparty/eigen/Eigen/StdVector @@ -0,0 +1,27 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDVECTOR_MODULE_H +#define EIGEN_STDVECTOR_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 /* MSVC auto aligns in 64 bit builds */ + +#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) + +#else + +#include "src/StlSupport/StdVector.h" + +#endif + +#endif // EIGEN_STDVECTOR_MODULE_H diff --git a/thirdparty/eigen/Eigen/SuperLUSupport b/thirdparty/eigen/Eigen/SuperLUSupport new file mode 100644 index 000000000..59312a82d --- /dev/null +++ b/thirdparty/eigen/Eigen/SuperLUSupport @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H +#define EIGEN_SUPERLUSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#ifdef EMPTY +#define EIGEN_EMPTY_WAS_ALREADY_DEFINED +#endif + +typedef int int_t; +#include +#include +#include + +// slu_util.h defines a preprocessor token named EMPTY which is really polluting, +// so we remove it in favor of a SUPERLU_EMPTY token. +// If EMPTY was already defined then we don't undef it. + +#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED) +# undef EIGEN_EMPTY_WAS_ALREADY_DEFINED +#elif defined(EMPTY) +# undef EMPTY +#endif + +#define SUPERLU_EMPTY (-1) + +namespace Eigen { struct SluMatrix; } + +/** \ingroup Support_modules + * \defgroup SuperLUSupport_Module SuperLUSupport module + * + * This module provides an interface to the SuperLU library. + * It provides the following factorization class: + * - class SuperLU: a supernodal sequential LU factorization. + * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). + * + * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported. + * + * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. + * + * \code + * #include + * \endcode + * + * In order to use this module, the superlu headers must be accessible from the include paths, and your binary must be linked to the superlu library and its dependencies. + * The dependencies depend on how superlu has been compiled. + * For a cmake based project, you can use our FindSuperLU.cmake module to help you in this task. + * + */ + +#include "src/SuperLUSupport/SuperLUSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/thirdparty/eigen/Eigen/UmfPackSupport b/thirdparty/eigen/Eigen/UmfPackSupport new file mode 100644 index 000000000..00eec8087 --- /dev/null +++ b/thirdparty/eigen/Eigen/UmfPackSupport @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H +#define EIGEN_UMFPACKSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + +/** \ingroup Support_modules + * \defgroup UmfPackSupport_Module UmfPackSupport module + * + * This module provides an interface to the UmfPack library which is part of the suitesparse package. + * It provides the following factorization class: + * - class UmfPackLU: a multifrontal sequential LU factorization. + * + * \code + * #include + * \endcode + * + * In order to use this module, the umfpack headers must be accessible from the include paths, and your binary must be linked to the umfpack library and its dependencies. + * The dependencies depend on how umfpack has been compiled. + * For a cmake based project, you can use our FindUmfPack.cmake module to help you in this task. + * + */ + +#include "src/UmfPackSupport/UmfPackSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_UMFPACKSUPPORT_MODULE_H diff --git a/thirdparty/eigen/Eigen/src/Cholesky/LDLT.h b/thirdparty/eigen/Eigen/src/Cholesky/LDLT.h new file mode 100644 index 000000000..fcee7b2e3 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Cholesky/LDLT.h @@ -0,0 +1,669 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// Copyright (C) 2009 Keir Mierle +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2011 Timothy E. Holy +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LDLT_H +#define EIGEN_LDLT_H + +namespace Eigen { + +namespace internal { + template struct LDLT_Traits; + + // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef + enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite }; +} + +/** \ingroup Cholesky_Module + * + * \class LDLT + * + * \brief Robust Cholesky decomposition of a matrix with pivoting + * + * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * The other triangular part won't be read. + * + * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite + * matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L + * is lower triangular with a unit diagonal and D is a diagonal matrix. + * + * The decomposition uses pivoting to ensure stability, so that L will have + * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root + * on D also stabilizes the computation. + * + * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky + * decomposition to determine whether a system of equations has a solution. + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT + */ +template class LDLT +{ + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + UpLo = _UpLo + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; + typedef Matrix TmpMatrixType; + + typedef Transpositions TranspositionType; + typedef PermutationMatrix PermutationType; + + typedef internal::LDLT_Traits Traits; + + /** \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LDLT::compute(const MatrixType&). + */ + LDLT() + : m_matrix(), + m_transpositions(), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LDLT() + */ + explicit LDLT(Index size) + : m_matrix(size, size), + m_transpositions(size), + m_temporary(size), + m_sign(internal::ZeroSign), + m_isInitialized(false) + {} + + /** \brief Constructor with decomposition + * + * This calculates the decomposition for the input \a matrix. + * + * \sa LDLT(Index size) + */ + template + explicit LDLT(const EigenBase& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa LDLT(const EigenBase&) + */ + template + explicit LDLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** Clear any existing decomposition + * \sa rankUpdate(w,sigma) + */ + void setZero() + { + m_isInitialized = false; + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the permutation matrix P as a transposition sequence. + */ + inline const TranspositionType& transpositionsP() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_transpositions; + } + + /** \returns the coefficients of the diagonal matrix D */ + inline Diagonal vectorD() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix.diagonal(); + } + + /** \returns true if the matrix is positive (semidefinite) */ + inline bool isPositive() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; + } + + /** \returns true if the matrix is negative (semidefinite) */ + inline bool isNegative(void) const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign; + } + + /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * This function also supports in-place solves using the syntax x = decompositionObject.solve(x) . + * + * \note_about_checking_solutions + * + * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$ + * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, + * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then + * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the + * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function + * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() + */ + template + inline const Solve + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows()==b.rows() + && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); + return Solve(*this, b.derived()); + } + + template + bool solveInPlace(MatrixBase &bAndX) const; + + template + LDLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the LDLT decomposition. + */ + RealScalar rcond() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + template + LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha=1); + + /** \returns the internal LDLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLDLT() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LDLT& adjoint() const { return *this; }; + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_info; + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** \internal + * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. + * The strict upper part is used during the decomposition, the strict lower + * part correspond to the coefficients of L (its diagonal is equal to 1 and + * is not stored), and the diagonal entries correspond to D. + */ + MatrixType m_matrix; + RealScalar m_l1_norm; + TranspositionType m_transpositions; + TmpMatrixType m_temporary; + internal::SignMatrix m_sign; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template struct ldlt_inplace; + +template<> struct ldlt_inplace +{ + template + static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) + { + using std::abs; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename TranspositionType::StorageIndex IndexType; + eigen_assert(mat.rows()==mat.cols()); + const Index size = mat.rows(); + bool found_zero_pivot = false; + bool ret = true; + + if (size <= 1) + { + transpositions.setIdentity(); + if (numext::real(mat.coeff(0,0)) > static_cast(0) ) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < static_cast(0)) sign = NegativeSemiDef; + else sign = ZeroSign; + return true; + } + + for (Index k = 0; k < size; ++k) + { + // Find largest diagonal element + Index index_of_biggest_in_corner; + mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + index_of_biggest_in_corner += k; + + transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner); + if(k != index_of_biggest_in_corner) + { + // apply the transposition while taking care to consider only + // the lower triangular part + Index s = size-index_of_biggest_in_corner-1; // trailing size after the biggest element + mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); + mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); + std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner)); + for(Index i=k+1;i::IsComplex) + mat.coeffRef(index_of_biggest_in_corner,k) = numext::conj(mat.coeff(index_of_biggest_in_corner,k)); + } + + // partition the matrix: + // A00 | - | - + // lu = A10 | A11 | - + // A20 | A21 | A22 + Index rs = size - k - 1; + Block A21(mat,k+1,k,rs,1); + Block A10(mat,k,0,1,k); + Block A20(mat,k+1,0,rs,k); + + if(k>0) + { + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); + mat.coeffRef(k,k) -= (A10 * temp.head(k)).value(); + if(rs>0) + A21.noalias() -= A20 * temp.head(k); + } + + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, since LDLT is not rank-revealing + // we should only make sure that we do not introduce INF or NaN values. + // Remark that LAPACK also uses 0 as the cutoff value. + RealScalar realAkk = numext::real(mat.coeffRef(k,k)); + bool pivot_is_valid = (abs(realAkk) > RealScalar(0)); + + if(k==0 && !pivot_is_valid) + { + // The entire diagonal is zero, there is nothing more to do + // except filling the transpositions, and checking whether the matrix is zero. + sign = ZeroSign; + for(Index j = 0; j0) && pivot_is_valid) + A21 /= realAkk; + + if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed + else if(!pivot_is_valid) found_zero_pivot = true; + + if (sign == PositiveSemiDef) { + if (realAkk < static_cast(0)) sign = Indefinite; + } else if (sign == NegativeSemiDef) { + if (realAkk > static_cast(0)) sign = Indefinite; + } else if (sign == ZeroSign) { + if (realAkk > static_cast(0)) sign = PositiveSemiDef; + else if (realAkk < static_cast(0)) sign = NegativeSemiDef; + } + } + + return ret; + } + + // Reference for the algorithm: Davis and Hager, "Multiple Rank + // Modifications of a Sparse Cholesky Factorization" (Algorithm 1) + // Trivial rearrangements of their computations (Timothy E. Holy) + // allow their algorithm to work for rank-1 updates even if the + // original matrix is not of full rank. + // Here only rank-1 updates are implemented, to reduce the + // requirement for intermediate storage and improve accuracy + template + static bool updateInPlace(MatrixType& mat, MatrixBase& w, const typename MatrixType::RealScalar& sigma=1) + { + using numext::isfinite; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + const Index size = mat.rows(); + eigen_assert(mat.cols() == size && w.size()==size); + + RealScalar alpha = 1; + + // Apply the update + for (Index j = 0; j < size; j++) + { + // Check for termination due to an original decomposition of low-rank + if (!(isfinite)(alpha)) + break; + + // Update the diagonal terms + RealScalar dj = numext::real(mat.coeff(j,j)); + Scalar wj = w.coeff(j); + RealScalar swj2 = sigma*numext::abs2(wj); + RealScalar gamma = dj*alpha + swj2; + + mat.coeffRef(j,j) += swj2/alpha; + alpha += swj2/dj; + + + // Update the terms of L + Index rs = size-j-1; + w.tail(rs) -= wj * mat.col(j).tail(rs); + if(gamma != 0) + mat.col(j).tail(rs) += (sigma*numext::conj(wj)/gamma)*w.tail(rs); + } + return true; + } + + template + static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1) + { + // Apply the permutation to the input w + tmp = transpositions * w; + + return ldlt_inplace::updateInPlace(mat,tmp,sigma); + } +}; + +template<> struct ldlt_inplace +{ + template + static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) + { + Transpose matt(mat); + return ldlt_inplace::unblocked(matt, transpositions, temp, sign); + } + + template + static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1) + { + Transpose matt(mat); + return ldlt_inplace::update(matt, transpositions, tmp, w.conjugate(), sigma); + } +}; + +template struct LDLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } +}; + +template struct LDLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } +}; + +} // end namespace internal + +/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix + */ +template +template +LDLT& LDLT::compute(const EigenBase& a) +{ + check_template_parameters(); + + eigen_assert(a.rows()==a.cols()); + const Index size = a.rows(); + + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (_UpLo == Lower) + abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) + m_l1_norm = abs_col_sum; + } + + m_transpositions.resize(size); + m_isInitialized = false; + m_temporary.resize(size); + m_sign = internal::ZeroSign; + + m_info = internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue; + + m_isInitialized = true; + return *this; +} + +/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T. + * \param w a vector to be incorporated into the decomposition. + * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column vectors. Optional; default value is +1. + * \sa setZero() + */ +template +template +LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename LDLT::RealScalar& sigma) +{ + typedef typename TranspositionType::StorageIndex IndexType; + const Index size = w.rows(); + if (m_isInitialized) + { + eigen_assert(m_matrix.rows()==size); + } + else + { + m_matrix.resize(size,size); + m_matrix.setZero(); + m_transpositions.resize(size); + for (Index i = 0; i < size; i++) + m_transpositions.coeffRef(i) = IndexType(i); + m_temporary.resize(size); + m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; + m_isInitialized = true; + } + + internal::ldlt_inplace::update(m_matrix, m_transpositions, m_temporary, w, sigma); + + return *this; +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and leads to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vecD.size(); ++i) + { + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } + + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); + + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; +} +#endif + +/** \internal use x = ldlt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. + * + * This version avoids a copy when the right hand side matrix b is not + * needed anymore. + * + * \sa LDLT::solve(), MatrixBase::ldlt() + */ +template +template +bool LDLT::solveInPlace(MatrixBase &bAndX) const +{ + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows() == bAndX.rows()); + + bAndX = this->solve(bAndX); + + return true; +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^T L D L^* P. + * This function is provided for debug purpose. */ +template +MatrixType LDLT::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LDLT is not initialized."); + const Index size = m_matrix.rows(); + MatrixType res(size,size); + + // P + res.setIdentity(); + res = transpositionsP() * res; + // L^* P + res = matrixU() * res; + // D(L^*P) + res = vectorD().real().asDiagonal() * res; + // L(DL^*P) + res = matrixL() * res; + // P^T (LDL^*P) + res = transpositionsP().transpose() * res; + + return res; +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa MatrixBase::ldlt() + */ +template +inline const LDLT::PlainObject, UpLo> +SelfAdjointView::ldlt() const +{ + return LDLT(m_matrix); +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa SelfAdjointView::ldlt() + */ +template +inline const LDLT::PlainObject> +MatrixBase::ldlt() const +{ + return LDLT(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_LDLT_H diff --git a/thirdparty/eigen/Eigen/src/Cholesky/LLT.h b/thirdparty/eigen/Eigen/src/Cholesky/LLT.h new file mode 100644 index 000000000..87ca8d423 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Cholesky/LLT.h @@ -0,0 +1,534 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LLT_H +#define EIGEN_LLT_H + +namespace Eigen { + +namespace internal{ +template struct LLT_Traits; +} + +/** \ingroup Cholesky_Module + * + * \class LLT + * + * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features + * + * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * The other triangular part won't be read. + * + * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite + * matrix A such that A = LL^* = U^*U, where L is lower triangular. + * + * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like D^*D x = b, + * for that purpose, we recommend the Cholesky decomposition without square root which is more stable + * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other + * situations like generalised eigen problems with hermitian matrices. + * + * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive definite matrices, + * use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine whether a system of equations + * has a solution. + * + * Example: \include LLT_example.cpp + * Output: \verbinclude LLT_example.out + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT + */ + /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) + * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, + * the strict lower part does not have to store correct values. + */ +template class LLT +{ + public: + typedef _MatrixType MatrixType; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; + + enum { + PacketSize = internal::packet_traits::size, + AlignmentMask = int(PacketSize)-1, + UpLo = _UpLo + }; + + typedef internal::LLT_Traits Traits; + + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LLT::compute(const MatrixType&). + */ + LLT() : m_matrix(), m_isInitialized(false) {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LLT() + */ + explicit LLT(Index size) : m_matrix(size, size), + m_isInitialized(false) {} + + template + explicit LLT(const EigenBase& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when + * \c MatrixType is a Eigen::Ref. + * + * \sa LLT(const EigenBase&) + */ + template + explicit LLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_isInitialized(false) + { + compute(matrix.derived()); + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * Since this LLT class assumes anyway that the matrix A is invertible, the solution + * theoretically exists and is unique regardless of b. + * + * Example: \include LLT_solve.cpp + * Output: \verbinclude LLT_solve.out + * + * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() + */ + template + inline const Solve + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows()==b.rows() + && "LLT::solve(): invalid number of rows of the right hand side matrix b"); + return Solve(*this, b.derived()); + } + + template + void solveInPlace(MatrixBase &bAndX) const; + + template + LLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the Cholesky decomposition. + */ + RealScalar rcond() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative"); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + /** \returns the LLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLLT() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_info; + } + + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LLT& adjoint() const { return *this; }; + + inline Index rows() const { return m_matrix.rows(); } + inline Index cols() const { return m_matrix.cols(); } + + template + LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif + + protected: + + static void check_template_parameters() + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); + } + + /** \internal + * Used to compute and store L + * The strict upper part is not used and even not initialized. + */ + MatrixType m_matrix; + RealScalar m_l1_norm; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template struct llt_inplace; + +template +static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) +{ + using std::sqrt; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::ColXpr ColXpr; + typedef typename internal::remove_all::type ColXprCleaned; + typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; + typedef Matrix TempVectorType; + typedef typename TempVectorType::SegmentReturnType TempVecSegment; + + Index n = mat.cols(); + eigen_assert(mat.rows()==n && vec.size()==n); + + TempVectorType temp; + + if(sigma>0) + { + // This version is based on Givens rotations. + // It is faster than the other one below, but only works for updates, + // i.e., for sigma > 0 + temp = sqrt(sigma) * vec; + + for(Index i=0; i g; + g.makeGivens(mat(i,i), -temp(i), &mat(i,i)); + + Index rs = n-i-1; + if(rs>0) + { + ColXprSegment x(mat.col(i).tail(rs)); + TempVecSegment y(temp.tail(rs)); + apply_rotation_in_the_plane(x, y, g); + } + } + } + else + { + temp = vec; + RealScalar beta = 1; + for(Index j=0; j struct llt_inplace +{ + typedef typename NumTraits::Real RealScalar; + template + static Index unblocked(MatrixType& mat) + { + using std::sqrt; + + eigen_assert(mat.rows()==mat.cols()); + const Index size = mat.rows(); + for(Index k = 0; k < size; ++k) + { + Index rs = size-k-1; // remaining size + + Block A21(mat,k+1,k,rs,1); + Block A10(mat,k,0,1,k); + Block A20(mat,k+1,0,rs,k); + + RealScalar x = numext::real(mat.coeff(k,k)); + if (k>0) x -= A10.squaredNorm(); + if (x<=RealScalar(0)) + return k; + mat.coeffRef(k,k) = x = sqrt(x); + if (k>0 && rs>0) A21.noalias() -= A20 * A10.adjoint(); + if (rs>0) A21 /= x; + } + return -1; + } + + template + static Index blocked(MatrixType& m) + { + eigen_assert(m.rows()==m.cols()); + Index size = m.rows(); + if(size<32) + return unblocked(m); + + Index blockSize = size/8; + blockSize = (blockSize/16)*16; + blockSize = (std::min)((std::max)(blockSize,Index(8)), Index(128)); + + for (Index k=0; k A11(m,k, k, bs,bs); + Block A21(m,k+bs,k, rs,bs); + Block A22(m,k+bs,k+bs,rs,rs); + + Index ret; + if((ret=unblocked(A11))>=0) return k+ret; + if(rs>0) A11.adjoint().template triangularView().template solveInPlace(A21); + if(rs>0) A22.template selfadjointView().rankUpdate(A21,typename NumTraits::Literal(-1)); // bottleneck + } + return -1; + } + + template + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + { + return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); + } +}; + +template struct llt_inplace +{ + typedef typename NumTraits::Real RealScalar; + + template + static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat) + { + Transpose matt(mat); + return llt_inplace::unblocked(matt); + } + template + static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat) + { + Transpose matt(mat); + return llt_inplace::blocked(matt); + } + template + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + { + Transpose matt(mat); + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); + } +}; + +template struct LLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } + static bool inplace_decomposition(MatrixType& m) + { return llt_inplace::blocked(m)==-1; } +}; + +template struct LLT_Traits +{ + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } + static bool inplace_decomposition(MatrixType& m) + { return llt_inplace::blocked(m)==-1; } +}; + +} // end namespace internal + +/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix + * + * \returns a reference to *this + * + * Example: \include TutorialLinAlgComputeTwice.cpp + * Output: \verbinclude TutorialLinAlgComputeTwice.out + */ +template +template +LLT& LLT::compute(const EigenBase& a) +{ + check_template_parameters(); + + eigen_assert(a.rows()==a.cols()); + const Index size = a.rows(); + m_matrix.resize(size, size); + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (_UpLo == Lower) + abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) + m_l1_norm = abs_col_sum; + } + + m_isInitialized = true; + bool ok = Traits::inplace_decomposition(m_matrix); + m_info = ok ? Success : NumericalIssue; + + return *this; +} + +/** Performs a rank one update (or dowdate) of the current decomposition. + * If A = LL^* before the rank one update, + * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector + * of same dimension. + */ +template +template +LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, const RealScalar& sigma) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType); + eigen_assert(v.size()==m_matrix.cols()); + eigen_assert(m_isInitialized); + if(internal::llt_inplace::rankUpdate(m_matrix,v,sigma)>=0) + m_info = NumericalIssue; + else + m_info = Success; + + return *this; +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + dst = rhs; + solveInPlace(dst); +} +#endif + +/** \internal use x = llt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * This version avoids a copy when the right hand side matrix b is not needed anymore. + * + * \sa LLT::solve(), MatrixBase::llt() + */ +template +template +void LLT::solveInPlace(MatrixBase &bAndX) const +{ + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows()==bAndX.rows()); + matrixL().solveInPlace(bAndX); + matrixU().solveInPlace(bAndX); +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: L L^*. + * This function is provided for debug purpose. */ +template +MatrixType LLT::reconstructedMatrix() const +{ + eigen_assert(m_isInitialized && "LLT is not initialized."); + return matrixL() * matrixL().adjoint().toDenseMatrix(); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() + */ +template +inline const LLT::PlainObject> +MatrixBase::llt() const +{ + return LLT(derived()); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() + */ +template +inline const LLT::PlainObject, UpLo> +SelfAdjointView::llt() const +{ + return LLT(m_matrix); +} + +} // end namespace Eigen + +#endif // EIGEN_LLT_H diff --git a/thirdparty/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h b/thirdparty/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h new file mode 100644 index 000000000..bc6489e69 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to LAPACKe + * LLt decomposition based on LAPACKE_?potrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_LLT_LAPACKE_H +#define EIGEN_LLT_LAPACKE_H + +namespace Eigen { + +namespace internal { + +template struct lapacke_llt; + +#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \ +template<> struct lapacke_llt \ +{ \ + template \ + static inline Index potrf(MatrixType& m, char uplo) \ + { \ + lapack_int matrix_order; \ + lapack_int size, lda, info, StorageOrder; \ + EIGTYPE* a; \ + eigen_assert(m.rows()==m.cols()); \ + /* Set up parameters for ?potrf */ \ + size = convert_index(m.rows()); \ + StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + a = &(m.coeffRef(0,0)); \ + lda = convert_index(m.outerStride()); \ +\ + info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \ + info = (info==0) ? -1 : info>0 ? info-1 : size; \ + return info; \ + } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static Index blocked(MatrixType& m) \ + { \ + return lapacke_llt::potrf(m, 'L'); \ + } \ + template \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ +}; \ +template<> struct llt_inplace \ +{ \ + template \ + static Index blocked(MatrixType& m) \ + { \ + return lapacke_llt::potrf(m, 'U'); \ + } \ + template \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + { \ + Transpose matt(mat); \ + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ + } \ +}; + +EIGEN_LAPACKE_LLT(double, double, d) +EIGEN_LAPACKE_LLT(float, float, s) +EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LLT_LAPACKE_H diff --git a/thirdparty/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/thirdparty/eigen/Eigen/src/CholmodSupport/CholmodSupport.h new file mode 100644 index 000000000..571972023 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/CholmodSupport/CholmodSupport.h @@ -0,0 +1,639 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_H +#define EIGEN_CHOLMODSUPPORT_H + +namespace Eigen { + +namespace internal { + +template struct cholmod_configure_matrix; + +template<> struct cholmod_configure_matrix { + template + static void run(CholmodType& mat) { + mat.xtype = CHOLMOD_REAL; + mat.dtype = CHOLMOD_DOUBLE; + } +}; + +template<> struct cholmod_configure_matrix > { + template + static void run(CholmodType& mat) { + mat.xtype = CHOLMOD_COMPLEX; + mat.dtype = CHOLMOD_DOUBLE; + } +}; + +// Other scalar types are not yet suppotred by Cholmod +// template<> struct cholmod_configure_matrix { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_REAL; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; +// +// template<> struct cholmod_configure_matrix > { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_COMPLEX; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; + +} // namespace internal + +/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. + * Note that the data are shared. + */ +template +cholmod_sparse viewAsCholmod(Ref > mat) +{ + cholmod_sparse res; + res.nzmax = mat.nonZeros(); + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.p = mat.outerIndexPtr(); + res.i = mat.innerIndexPtr(); + res.x = mat.valuePtr(); + res.z = 0; + res.sorted = 1; + if(mat.isCompressed()) + { + res.packed = 1; + res.nz = 0; + } + else + { + res.packed = 0; + res.nz = mat.innerNonZeroPtr(); + } + + res.dtype = 0; + res.stype = -1; + + if (internal::is_same<_StorageIndex,int>::value) + { + res.itype = CHOLMOD_INT; + } + else if (internal::is_same<_StorageIndex,long>::value) + { + res.itype = CHOLMOD_LONG; + } + else + { + eigen_assert(false && "Index type not supported yet"); + } + + // setup res.xtype + internal::cholmod_configure_matrix<_Scalar>::run(res); + + res.stype = 0; + + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. + * The data are not copied but shared. */ +template +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.matrix().const_cast_derived())); + + if(UpLo==Upper) res.stype = 1; + if(UpLo==Lower) res.stype = -1; + + return res; +} + +/** Returns a view of the Eigen \b dense matrix \a mat as Cholmod dense matrix. + * The data are not copied but shared. */ +template +cholmod_dense viewAsCholmod(MatrixBase& mat) +{ + EIGEN_STATIC_ASSERT((internal::traits::Flags&RowMajorBit)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + typedef typename Derived::Scalar Scalar; + + cholmod_dense res; + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.nzmax = res.nrow * res.ncol; + res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride(); + res.x = (void*)(mat.derived().data()); + res.z = 0; + + internal::cholmod_configure_matrix::run(res); + + return res; +} + +/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. + * The data are not copied but shared. */ +template +MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) +{ + return MappedSparseMatrix + (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], + static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); +} + +enum CholmodMode { + CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt +}; + + +/** \ingroup CholmodSupport_Module + * \class CholmodBase + * \brief The base class for the direct Cholesky factorization of Cholmod + * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT + */ +template +class CholmodBase : public SparseSolverBase +{ + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; + public: + typedef _MatrixType MatrixType; + enum { UpLo = _UpLo }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef MatrixType CholMatrixType; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; + + public: + + CholmodBase() + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; + cholmod_start(&m_cholmod); + } + + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) + { + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; + cholmod_start(&m_cholmod); + compute(matrix); + } + + ~CholmodBase() + { + if(m_cholmodFactor) + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + cholmod_finish(&m_cholmod); + } + + inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } + inline StorageIndex rows() const { return internal::convert_index(m_cholmodFactor->n); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + /** Computes the sparse Cholesky decomposition of \a matrix */ + Derived& compute(const MatrixType& matrix) + { + analyzePattern(matrix); + factorize(matrix); + return derived(); + } + + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ + void analyzePattern(const MatrixType& matrix) + { + if(m_cholmodFactor) + { + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + m_cholmodFactor = 0; + } + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); + + this->m_isInitialized = true; + this->m_info = Success; + m_analysisIsOk = true; + m_factorizationIsOk = false; + } + + /** Performs a numeric decomposition of \a matrix + * + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. + * + * \sa analyzePattern() + */ + void factorize(const MatrixType& matrix) + { + eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); + + // If the factorization failed, minor is the column at which it did. On success minor == n. + this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); + m_factorizationIsOk = true; + } + + /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations. + * See the Cholmod user guide for details. */ + cholmod_common& cholmod() { return m_cholmod; } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal */ + template + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const + { + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size==b.rows()); + + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. + Ref > b_ref(b.derived()); + + cholmod_dense b_cd = viewAsCholmod(b_ref); + cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); + if(!x_cd) + { + this->m_info = NumericalIssue; + return; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); + cholmod_free_dense(&x_cd, &m_cholmod); + } + + /** \internal */ + template + void _solve_impl(const SparseMatrixBase &b, SparseMatrixBase &dest) const + { + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size==b.rows()); + + // note: cs stands for Cholmod Sparse + Ref > b_ref(b.const_cast_derived()); + cholmod_sparse b_cs = viewAsCholmod(b_ref); + cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); + if(!x_cs) + { + this->m_info = NumericalIssue; + return; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + dest.derived() = viewAsEigen(*x_cs); + cholmod_free_sparse(&x_cs, &m_cholmod); + } + #endif // EIGEN_PARSED_BY_DOXYGEN + + + /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization. + * + * During the numerical factorization, an offset term is added to the diagonal coefficients:\n + * \c d_ii = \a offset + \c d_ii + * + * The default is \a offset=0. + * + * \returns a reference to \c *this. + */ + Derived& setShift(const RealScalar& offset) + { + m_shiftOffset[0] = double(offset); + return derived(); + } + + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const + { + using std::exp; + return exp(logDeterminant()); + } + + /** \returns the log determinant of the underlying matrix from the current factorization */ + Scalar logDeterminant() const + { + using std::log; + using numext::real; + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + + RealScalar logDet = 0; + Scalar *x = static_cast(m_cholmodFactor->x); + if (m_cholmodFactor->is_super) + { + // Supernodal factorization stored as a packed list of dense column-major blocs, + // as described by the following structure: + + // super[k] == index of the first column of the j-th super node + StorageIndex *super = static_cast(m_cholmodFactor->super); + // pi[k] == offset to the description of row indices + StorageIndex *pi = static_cast(m_cholmodFactor->pi); + // px[k] == offset to the respective dense block + StorageIndex *px = static_cast(m_cholmodFactor->px); + + Index nb_super_nodes = m_cholmodFactor->nsuper; + for (Index k=0; k < nb_super_nodes; ++k) + { + StorageIndex ncols = super[k + 1] - super[k]; + StorageIndex nrows = pi[k + 1] - pi[k]; + + Map, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1)); + logDet += sk.real().log().sum(); + } + } + else + { + // Simplicial factorization stored as standard CSC matrix. + StorageIndex *p = static_cast(m_cholmodFactor->p); + Index size = m_cholmodFactor->n; + for (Index k=0; kis_ll) + logDet *= 2.0; + return logDet; + }; + + template + void dumpMemory(Stream& /*s*/) + {} + + protected: + mutable cholmod_common m_cholmod; + cholmod_factor* m_cholmodFactor; + double m_shiftOffset[2]; + mutable ComputationInfo m_info; + int m_factorizationIsOk; + int m_analysisIsOk; +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLLT + * \brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT + */ +template +class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSimplicialLLT() : Base() { init(); } + + CholmodSimplicialLLT(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodSimplicialLLT() {} + protected: + void init() + { + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + } +}; + + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLDLT + * \brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical interest. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT + */ +template +class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSimplicialLDLT() : Base() { init(); } + + CholmodSimplicialLDLT(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodSimplicialLDLT() {} + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSupernodalLLT + * \brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization + * using the Cholmod library. + * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept + */ +template +class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodSupernodalLLT() : Base() { init(); } + + CholmodSupernodalLLT(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodSupernodalLLT() {} + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodDecomposition + * \brief A general Cholesky factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization + * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * This variant permits to change the underlying Cholesky method at runtime. + * On the other hand, it does not provide access to the result of the factorization. + * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization. + * + * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept + */ +template +class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > +{ + typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base; + using Base::m_cholmod; + + public: + + typedef _MatrixType MatrixType; + + CholmodDecomposition() : Base() { init(); } + + CholmodDecomposition(const MatrixType& matrix) : Base() + { + init(); + this->compute(matrix); + } + + ~CholmodDecomposition() {} + + void setMode(CholmodMode mode) + { + switch(mode) + { + case CholmodAuto: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + break; + case CholmodSimplicialLLt: + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + break; + case CholmodSupernodalLLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + break; + case CholmodLDLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + break; + default: + break; + } + } + protected: + void init() + { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CHOLMODSUPPORT_H diff --git a/thirdparty/eigen/Eigen/src/Core/Array.h b/thirdparty/eigen/Eigen/src/Core/Array.h new file mode 100644 index 000000000..0d34269fd --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Array.h @@ -0,0 +1,325 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAY_H +#define EIGEN_ARRAY_H + +namespace Eigen { + +namespace internal { +template +struct traits > : traits > +{ + typedef ArrayXpr XprKind; + typedef ArrayBase > XprBase; +}; +} + +/** \class Array + * \ingroup Core_Module + * + * \brief General-purpose arrays with easy API for coefficient-wise operations + * + * The %Array class is very similar to the Matrix class. It provides + * general-purpose one- and two-dimensional arrays. The difference between the + * %Array and the %Matrix class is primarily in the API: the API for the + * %Array class provides easy access to coefficient-wise operations, while the + * API for the %Matrix class provides easy access to linear-algebra + * operations. + * + * See documentation of class Matrix for detailed information on the template parameters + * storage layout. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. + * + * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy + */ +template +class Array + : public PlainObjectBase > +{ + public: + + typedef PlainObjectBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Array) + + enum { Options = _Options }; + typedef typename Base::PlainObject PlainObject; + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; + + public: + + using Base::base; + using Base::coeff; + using Base::coeffRef; + + /** + * The usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const EigenBase &other) + { + return Base::operator=(other); + } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } + + /** Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const DenseBase& other) + { + return Base::_set(other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Array& other) + { + return Base::_set(other); + } + + /** Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array() : Base() + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + // FIXME is it still needed ?? + /** \internal */ + EIGEN_DEVICE_FUNC + Array(internal::constructor_without_unaligned_array_assert) + : Base(internal::constructor_without_unaligned_array_assert()) + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } +#endif + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) + : Base(std::move(other)) + { + Base::_check_template_params(); + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); + } + EIGEN_DEVICE_FUNC + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) + { + other.swap(*this); + return *this; + } +#endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) + { + Base::_check_template_params(); + this->template _init2(val0, val1); + } + #else + /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Array(const Scalar *data); + /** Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Array() instead. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(Index dim); + /** constructs an initialized 1x1 Array with the given coefficient */ + Array(const Scalar& value); + /** constructs an uninitialized array with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size arrays. For fixed-size arrays, + * it is redundant to pass these parameters, so one should use the default constructor + * Array() instead. */ + Array(Index rows, Index cols); + /** constructs an initialized 2D vector with given coefficients */ + Array(const Scalar& val0, const Scalar& val1); + #endif + + /** constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + } + /** constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + m_storage.data()[3] = val3; + } + + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const Array& other) + : Base(other) + { } + + /** \sa MatrixBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const EigenBase &other) + : Base(other.derived()) + { } + + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } + + #ifdef EIGEN_ARRAY_PLUGIN + #include EIGEN_ARRAY_PLUGIN + #endif + + private: + + template + friend struct internal::matrix_swap_impl; +}; + +/** \defgroup arraytypedefs Global array typedefs + * \ingroup Core_Module + * + * Eigen defines several typedef shortcuts for most common 1D and 2D array types. + * + * The general patterns are the following: + * + * \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of floats. + * + * There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is + * a fixed-size 1D array of 4 complex floats. + * + * \sa class Array + */ + +#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##SizeSuffix##SizeSuffix##TypeSuffix; \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##Size##X##TypeSuffix; \ +/** \ingroup arraytypedefs */ \ +typedef Array Array##X##Size##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \ +EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_ARRAY_TYPEDEFS + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_LARGE + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ +using Eigen::Matrix##SizeSuffix##TypeSuffix; \ +using Eigen::Vector##SizeSuffix##TypeSuffix; \ +using Eigen::RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ + +#define EIGEN_USING_ARRAY_TYPEDEFS \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \ +EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd) + +} // end namespace Eigen + +#endif // EIGEN_ARRAY_H diff --git a/thirdparty/eigen/Eigen/src/Core/ArrayBase.h b/thirdparty/eigen/Eigen/src/Core/ArrayBase.h new file mode 100644 index 000000000..f0232f65e --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/ArrayBase.h @@ -0,0 +1,226 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYBASE_H +#define EIGEN_ARRAYBASE_H + +namespace Eigen { + +template class MatrixWrapper; + +/** \class ArrayBase + * \ingroup Core_Module + * + * \brief Base class for all 1D and 2D array, and related expressions + * + * An array is similar to a dense vector or matrix. While matrices are mathematical + * objects with well defined linear algebra operators, an array is just a collection + * of scalar values arranged in a one or two dimensionnal fashion. As the main consequence, + * all operations applied to an array are performed coefficient wise. Furthermore, + * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient + * constructors allowing to easily write generic code working for both scalar values + * and arrays. + * + * This class is the base that is inherited by all array expression types. + * + * \tparam Derived is the derived type, e.g., an array or an expression type. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. + * + * \sa class MatrixBase, \ref TopicClassHierarchy + */ +template class ArrayBase + : public DenseBase +{ + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** The base class for a given storage type. */ + typedef ArrayBase StorageBaseType; + + typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::operator=; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Base::PlainObject PlainObject; + + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" +# include "../plugins/MatrixCwiseUnaryOps.h" +# include "../plugins/ArrayCwiseUnaryOps.h" +# include "../plugins/CommonCwiseBinaryOps.h" +# include "../plugins/MatrixCwiseBinaryOps.h" +# include "../plugins/ArrayCwiseBinaryOps.h" +# ifdef EIGEN_ARRAYBASE_PLUGIN +# include EIGEN_ARRAYBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const ArrayBase& other) + { + internal::call_assignment(derived(), other.derived()); + return derived(); + } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const Scalar& scalar); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const Scalar& scalar); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const ArrayBase& other); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const ArrayBase& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const ArrayBase& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const ArrayBase& other); + + public: + EIGEN_DEVICE_FUNC + ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC + const ArrayBase& array() const { return *this; } + + /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array + * \sa MatrixBase::array() */ + EIGEN_DEVICE_FUNC + MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC + const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } + +// template +// inline void evalTo(Dest& dst) const { dst = matrix(); } + + protected: + EIGEN_DEVICE_FUNC + ArrayBase() : Base() {} + + private: + explicit ArrayBase(Index); + ArrayBase(Index,Index); + template explicit ArrayBase(const ArrayBase&); + protected: + // mixing arrays and matrices is not legal + template Derived& operator+=(const MatrixBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} + // mixing arrays and matrices is not legal + template Derived& operator-=(const MatrixBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator-=(const ArrayBase &other) +{ + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator+=(const ArrayBase& other) +{ + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other coefficient wise. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator*=(const ArrayBase& other) +{ + call_assignment(derived(), other.derived(), internal::mul_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this / \a other coefficient wise. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +ArrayBase::operator/=(const ArrayBase& other) +{ + call_assignment(derived(), other.derived(), internal::div_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_ARRAYBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/ArrayWrapper.h b/thirdparty/eigen/Eigen/src/Core/ArrayWrapper.h new file mode 100644 index 000000000..a04521a16 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/ArrayWrapper.h @@ -0,0 +1,207 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYWRAPPER_H +#define EIGEN_ARRAYWRAPPER_H + +namespace Eigen { + +/** \class ArrayWrapper + * \ingroup Core_Module + * + * \brief Expression of a mathematical vector or matrix as an array object + * + * This class is the return type of MatrixBase::array(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::array(), class MatrixWrapper + */ + +namespace internal { +template +struct traits > + : public traits::type > +{ + typedef ArrayXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; +}; +} + +template +class ArrayWrapper : public ArrayBase > +{ + public: + typedef ArrayBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all::type NestedExpression; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + typedef typename internal::ref_selector::non_const_type NestedExpressionType; + + using Base::coeffRef; + + EIGEN_DEVICE_FUNC + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC + inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC + inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC + inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return m_expression.data(); } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_expression.coeffRef(rowId, colId); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + return m_expression.coeffRef(index); + } + + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const { dst = m_expression; } + + const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC + nestedExpression() const + { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC + void resize(Index newSize) { m_expression.resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } + + protected: + NestedExpressionType m_expression; +}; + +/** \class MatrixWrapper + * \ingroup Core_Module + * + * \brief Expression of an array as a mathematical vector or matrix + * + * This class is the return type of ArrayBase::matrix(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::matrix(), class ArrayWrapper + */ + +namespace internal { +template +struct traits > + : public traits::type > +{ + typedef MatrixXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits::type >::Flags, + Flags = Flags0 & ~NestByRefBit + }; +}; +} + +template +class MatrixWrapper : public MatrixBase > +{ + public: + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all::type NestedExpression; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + typedef typename internal::ref_selector::non_const_type NestedExpressionType; + + using Base::coeffRef; + + EIGEN_DEVICE_FUNC + explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC + inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC + inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC + inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return m_expression.data(); } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_expression.derived().coeffRef(rowId, colId); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + return m_expression.coeffRef(index); + } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const + { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC + void resize(Index newSize) { m_expression.resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } + + protected: + NestedExpressionType m_expression; +}; + +} // end namespace Eigen + +#endif // EIGEN_ARRAYWRAPPER_H diff --git a/thirdparty/eigen/Eigen/src/Core/Assign.h b/thirdparty/eigen/Eigen/src/Core/Assign.h new file mode 100644 index 000000000..53806ba33 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Assign.h @@ -0,0 +1,90 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Michael Olbrich +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_H +#define EIGEN_ASSIGN_H + +namespace Eigen { + +template +template +EIGEN_STRONG_INLINE Derived& DenseBase + ::lazyAssign(const DenseBase& other) +{ + enum{ + SameType = internal::is_same::value + }; + + EIGEN_STATIC_ASSERT_LVALUE(Derived) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) + EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + eigen_assert(rows() == other.rows() && cols() == other.cols()); + internal::call_assignment_no_alias(derived(),other.derived()); + + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) +{ + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) +{ + other.derived().evalTo(derived()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_H diff --git a/thirdparty/eigen/Eigen/src/Core/AssignEvaluator.h b/thirdparty/eigen/Eigen/src/Core/AssignEvaluator.h new file mode 100644 index 000000000..0d0189657 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,913 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +namespace Eigen { + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits + +template +struct copy_using_evaluator_traits +{ + typedef typename DstEvaluator::XprType Dst; + typedef typename Dst::Scalar DstScalar; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags + }; + +public: + enum { + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, + DstHasDirectAccess = DstFlags & DirectAccessBit, + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) + }; + +private: + enum { + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime + }; + + // TODO distinguish between linear traversal and inner-traversals + typedef typename find_best_packet::type LinearPacketType; + typedef typename find_best_packet::type InnerPacketType; + + enum { + LinearPacketSize = unpacket_traits::size, + InnerPacketSize = unpacket_traits::size + }; + +public: + enum { + LinearRequiredAlignment = unpacket_traits::alignment, + InnerRequiredAlignment = unpacket_traits::alignment + }; + +private: + enum { + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), + MightVectorize = bool(StorageOrdersAgree) + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && bool(functor_traits::PacketAccess), + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 + && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), + MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), + MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ + }; + +public: + enum { + Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal) + : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + + typedef typename conditional::type PacketType; + +private: + enum { + ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize + : Vectorized ? InnerPacketSize + : 1, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) + ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(NoUnrolling) ) +#if EIGEN_UNALIGNED_VECTORIZE + : int(Traversal) == int(SliceVectorizedTraversal) + ? ( bool(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) ) +#endif + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(LinearRequiredAlignment) + EIGEN_DEBUG_VAR(InnerRequiredAlignment) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(LinearPacketSize) + EIGEN_DEBUG_VAR(InnerPacketSize) + EIGEN_DEBUG_VAR(ActualPacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.assignCoeffByOuterInner(outer, inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.assignCoeffByOuterInner(outer, Index_); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) + { + kernel.assignCoeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.template assignPacketByOuterInner(outer, inner); + enum { NextIndex = Index + unpacket_traits::size }; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.template assignPacketByOuterInner(outer, Index_); + enum { NextIndex = Index_ + unpacket_traits::size }; + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +// dense_assignment_loop is based on assign_impl + +template +struct dense_assignment_loop; + +/************************ +*** Default traversal *** +************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) + { + for(Index outer = 0; outer < kernel.outerSize(); ++outer) { + for(Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + +template +struct unaligned_dense_assignment_loop +{ + // if IsAligned = true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} +}; + +template <> +struct unaligned_dense_assignment_loop +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. + // FIXME check which version exhibits this issue +#if EIGEN_COMP_MSVC + template + static EIGEN_DONT_INLINE void run(Kernel &kernel, + Index start, + Index end) +#else + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, + Index start, + Index end) +#endif + { + for (Index index = start; index < end; ++index) + kernel.assignCoeff(index); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, + packetSize = unpacket_traits::size, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = packet_traits::AlignedOnScalar ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), + srcAlignment = Kernel::AssignmentTraits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + kernel.template assignPacket(index); + + unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::SizeAtCompileTime, + packetSize =unpacket_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct dense_assignment_loop +{ + typedef typename Kernel::PacketType PacketType; + enum { + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index packetSize = unpacket_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::AssignmentTraits Traits; + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + for(Index i = 0; i < size; ++i) + kernel.assignCoeff(i); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + { + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + packetSize = unpacket_traits::size, + requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), + alignable = packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = alignable ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) + }; + const Scalar *dst_ptr = &kernel.dstEvaluator().coeffRef(0,0); + if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return dense_assignment_loop::run(kernel); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::InnerSizeAtCompileTime, + packetSize =unpacket_traits::size, + vectorizableSize = (size/packetSize)*packetSize }; + + for(Index outer = 0; outer < kernel.outerSize(); ++outer) + { + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } + } +}; +#endif + + +/*************************************************************************** +* Part 4 : Generic dense assignment kernel +***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the two evaluators, and the actual assignment functor. +// This abstraction level permits to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +template +class generic_dense_assignment_kernel +{ +protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; +public: + + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } + + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } + + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } + + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) + { + m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) + { + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); + } + + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) + { + m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) + { + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner + : outer; + } + +protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor &m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; +}; + +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) +{ + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + SrcEvaluatorType srcEvaluator(src); + + // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, + // we need to resize the destination after the source evaluator has been created. + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + DstEvaluatorType dstEvaluator(dst); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 6 : Generic assignment +***************************************************************************/ + +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template struct AssignmentKind; + +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; + +template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind { typedef Dense2Dense Kind; }; + +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, + typename EnableIf = void> +struct Assignment; + + +// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. +// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to "assume-aliasing" such that Assignment +// does not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(const Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with "assume-aliasing" +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing::value, void*>::type = 0) +{ + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::value, void*>::type = 0) +{ + call_assignment_no_alias(dst, src, func); +} + +// by-pass "assume-aliasing" +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 + }; + + typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src) +{ + call_assignment_no_alias(dst, src, internal::assign_op()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +{ + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +{ + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + +// forward declaration +template void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; + +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.evalTo(dst); + } + + // NOTE The following two functions are templated to avoid their instanciation if not needed + // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. + template + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.addTo(dst); + } + + template + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.subTo(dst); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_EVALUATOR_H diff --git a/thirdparty/eigen/Eigen/src/Core/Assign_MKL.h b/thirdparty/eigen/Eigen/src/Core/Assign_MKL.h new file mode 100755 index 000000000..6c2ab9264 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Assign_MKL.h @@ -0,0 +1,176 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2015 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() + ******************************************************************************** +*/ + +#ifndef EIGEN_ASSIGN_VML_H +#define EIGEN_ASSIGN_VML_H + +namespace Eigen { + +namespace internal { + +template +class vml_assign_traits +{ + private: + enum { + DstHasDirectAccess = Dst::Flags & DirectAccessBit, + SrcHasDirectAccess = Src::Flags & DirectAccessBit, + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + + MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD + }; + public: + enum { + EnableVml = MightEnableVml && LargeEnough, + Traversal = MightLinearize ? LinearTraversal : DefaultTraversal + }; +}; + +#define EIGEN_PP_EXPAND(ARG) ARG +#if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) +#define EIGEN_VMLMODE_EXPAND_LA , VML_HA +#else +#define EIGEN_VMLMODE_EXPAND_LA , VML_LA +#endif + +#define EIGEN_VMLMODE_EXPAND__ + +#define EIGEN_VMLMODE_PREFIX_LA vm +#define EIGEN_VMLMODE_PREFIX__ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if(vml_assign_traits::Traversal==LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; \ + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested, typename Plain> \ + struct Assignment, SrcXprNested, \ + const CwiseNullaryOp,Plain> >, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseBinaryOp, SrcXprNested, \ + const CwiseNullaryOp,Plain> > SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ + if(vml_assign_traits::Traversal==LinearTraversal) \ + { \ + VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \ + &(src.lhs().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_VML_H diff --git a/thirdparty/eigen/Eigen/src/Core/BandMatrix.h b/thirdparty/eigen/Eigen/src/Core/BandMatrix.h new file mode 100644 index 000000000..4978c9140 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/BandMatrix.h @@ -0,0 +1,353 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BANDMATRIX_H +#define EIGEN_BANDMATRIX_H + +namespace Eigen { + +namespace internal { + +template +class BandMatrixBase : public EigenBase +{ + public: + + enum { + Flags = internal::traits::Flags, + CoeffReadCost = internal::traits::CoeffReadCost, + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + Supers = internal::traits::Supers, + Subs = internal::traits::Subs, + Options = internal::traits::Options + }; + typedef typename internal::traits::Scalar Scalar; + typedef Matrix DenseMatrixType; + typedef typename DenseMatrixType::StorageIndex StorageIndex; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef EigenBase Base; + + protected: + enum { + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) + ? 1 + Supers + Subs + : Dynamic, + SizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime,ColsAtCompileTime) + }; + + public: + + using Base::derived; + using Base::rows; + using Base::cols; + + /** \returns the number of super diagonals */ + inline Index supers() const { return derived().supers(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return derived().subs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline const CoefficientsType& coeffs() const { return derived().coeffs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline CoefficientsType& coeffs() { return derived().coeffs(); } + + /** \returns a vector expression of the \a i -th column, + * only the meaningful part is returned. + * \warning the internal storage must be column major. */ + inline Block col(Index i) + { + EIGEN_STATIC_ASSERT((Options&RowMajor)==0,THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + Index start = 0; + Index len = coeffs().rows(); + if (i<=supers()) + { + start = supers()-i; + len = (std::min)(rows(),std::max(0,coeffs().rows() - (supers()-i))); + } + else if (i>=rows()-subs()) + len = std::max(0,coeffs().rows() - (i + 1 - rows() + subs())); + return Block(coeffs(), start, i, len, 1); + } + + /** \returns a vector expression of the main diagonal */ + inline Block diagonal() + { return Block(coeffs(),supers(),0,1,(std::min)(rows(),cols())); } + + /** \returns a vector expression of the main diagonal (const version) */ + inline const Block diagonal() const + { return Block(coeffs(),supers(),0,1,(std::min)(rows(),cols())); } + + template struct DiagonalIntReturnType { + enum { + ReturnOpposite = (Options&SelfAdjoint) && (((Index)>0 && Supers==0) || ((Index)<0 && Subs==0)), + Conjugate = ReturnOpposite && NumTraits::IsComplex, + ActualIndex = ReturnOpposite ? -Index : Index, + DiagonalSize = (RowsAtCompileTime==Dynamic || ColsAtCompileTime==Dynamic) + ? Dynamic + : (ActualIndex<0 + ? EIGEN_SIZE_MIN_PREFER_DYNAMIC(ColsAtCompileTime, RowsAtCompileTime + ActualIndex) + : EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime - ActualIndex)) + }; + typedef Block BuildType; + typedef typename internal::conditional,BuildType >, + BuildType>::type Type; + }; + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template inline typename DiagonalIntReturnType::Type diagonal() + { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template inline const typename DiagonalIntReturnType::Type diagonal() const + { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers()-N, (std::max)(0,N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline Block diagonal(Index i) + { + eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); + return Block(coeffs(), supers()-i, std::max(0,i), 1, diagonalLength(i)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline const Block diagonal(Index i) const + { + eigen_assert((i<0 && -i<=subs()) || (i>=0 && i<=supers())); + return Block(coeffs(), supers()-i, std::max(0,i), 1, diagonalLength(i)); + } + + template inline void evalTo(Dest& dst) const + { + dst.resize(rows(),cols()); + dst.setZero(); + dst.diagonal() = diagonal(); + for (Index i=1; i<=supers();++i) + dst.diagonal(i) = diagonal(i); + for (Index i=1; i<=subs();++i) + dst.diagonal(-i) = diagonal(-i); + } + + DenseMatrixType toDenseMatrix() const + { + DenseMatrixType res(rows(),cols()); + evalTo(res); + return res; + } + + protected: + + inline Index diagonalLength(Index i) const + { return i<0 ? (std::min)(cols(),rows()+i) : (std::min)(rows(),cols()-i); } +}; + +/** + * \class BandMatrix + * \ingroup Core_Module + * + * \brief Represents a rectangular matrix with a banded storage + * + * \tparam _Scalar Numeric type, i.e. float, double, int + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * \tparam _Supers Number of super diagonal + * \tparam _Subs Number of sub diagonal + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint + * The former controls \ref TopicStorageOrders "storage order", and defaults to + * column-major. The latter controls whether the matrix represents a selfadjoint + * matrix in which case either Supers of Subs have to be null. + * + * \sa class TridiagonalMatrix + */ + +template +struct traits > +{ + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + enum { + CoeffReadCost = NumTraits::ReadCost, + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _Rows, + MaxColsAtCompileTime = _Cols, + Flags = LvalueBit, + Supers = _Supers, + Subs = _Subs, + Options = _Options, + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef Matrix CoefficientsType; +}; + +template +class BandMatrix : public BandMatrixBase > +{ + public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::CoefficientsType CoefficientsType; + + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + : m_coeffs(1+supers+subs,cols), + m_rows(rows), m_supers(supers), m_subs(subs) + { + } + + /** \returns the number of columns */ + inline Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + inline Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + inline Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + inline CoefficientsType& coeffs() { return m_coeffs; } + + protected: + + CoefficientsType m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +template +class BandMatrixWrapper; + +template +struct traits > +{ + typedef typename _CoefficientsType::Scalar Scalar; + typedef typename _CoefficientsType::StorageKind StorageKind; + typedef typename _CoefficientsType::StorageIndex StorageIndex; + enum { + CoeffReadCost = internal::traits<_CoefficientsType>::CoeffReadCost, + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _Rows, + MaxColsAtCompileTime = _Cols, + Flags = LvalueBit, + Supers = _Supers, + Subs = _Subs, + Options = _Options, + DataRowsAtCompileTime = ((Supers!=Dynamic) && (Subs!=Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef _CoefficientsType CoefficientsType; +}; + +template +class BandMatrixWrapper : public BandMatrixBase > +{ + public: + + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef typename internal::traits::StorageIndex StorageIndex; + + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + : m_coeffs(coeffs), + m_rows(rows), m_supers(supers), m_subs(subs) + { + EIGEN_UNUSED_VARIABLE(cols); + //internal::assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows()); + } + + /** \returns the number of columns */ + inline Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + inline Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + inline Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + + protected: + + const CoefficientsType& m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +/** + * \class TridiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a tridiagonal matrix with a compact banded storage + * + * \tparam Scalar Numeric type, i.e. float, double, int + * \tparam Size Number of rows and cols, or \b Dynamic + * \tparam Options Can be 0 or \b SelfAdjoint + * + * \sa class BandMatrix + */ +template +class TridiagonalMatrix : public BandMatrix +{ + typedef BandMatrix Base; + typedef typename Base::StorageIndex StorageIndex; + public: + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + + inline typename Base::template DiagonalIntReturnType<1>::Type super() + { return Base::template diagonal<1>(); } + inline const typename Base::template DiagonalIntReturnType<1>::Type super() const + { return Base::template diagonal<1>(); } + inline typename Base::template DiagonalIntReturnType<-1>::Type sub() + { return Base::template diagonal<-1>(); } + inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const + { return Base::template diagonal<-1>(); } + protected: +}; + + +struct BandShape {}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BANDMATRIX_H diff --git a/thirdparty/eigen/Eigen/src/Core/Block.h b/thirdparty/eigen/Eigen/src/Core/Block.h new file mode 100644 index 000000000..11de45c2e --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Block.h @@ -0,0 +1,452 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BLOCK_H +#define EIGEN_BLOCK_H + +namespace Eigen { + +namespace internal { +template +struct traits > : traits +{ + typedef typename traits::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + enum{ + MatrixRows = traits::RowsAtCompileTime, + MatrixCols = traits::ColsAtCompileTime, + RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows, + ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols, + MaxRowsAtCompileTime = BlockRows==0 ? 0 + : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) + : int(traits::MaxRowsAtCompileTime), + MaxColsAtCompileTime = BlockCols==0 ? 0 + : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) + : int(traits::MaxColsAtCompileTime), + + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : XprTypeIsRowMajor, + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, + // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 + }; +}; + +template::ret> class BlockImpl_dense; + +} // end namespace internal + +template class BlockImpl; + +/** \class Block + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size block + * + * \tparam XprType the type of the expression in which we are taking a block + * \tparam BlockRows the number of rows of the block we are taking at compile time (optional) + * \tparam BlockCols the number of columns of the block we are taking at compile time (optional) + * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. + * + * This class represents an expression of either a fixed-size or dynamic-size block. It is the return + * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly maniputate block expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_Block.cpp + * Output: \verbinclude class_Block.out + * + * \note Even though this expression has dynamic size, in the case where \a XprType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedBlock.cpp + * Output: \verbinclude class_FixedBlock.out + * + * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock + */ +template class Block + : public BlockImpl::StorageKind> +{ + typedef BlockImpl::StorageKind> Impl; + public: + //typedef typename Impl::Base Base; + typedef Impl Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Block) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all::type NestedExpression; + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, Index i) : Impl(xpr,i) + { + eigen_assert( (i>=0) && ( + ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && i= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows() + && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols()); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) + { + eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) + && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); + eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows + && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols); + } +}; + +// The generic default implementation for dense block simplu forward to the internal::BlockImpl_dense +// that must be specialized for direct and non-direct access... +template +class BlockImpl + : public internal::BlockImpl_dense +{ + typedef internal::BlockImpl_dense Impl; + typedef typename XprType::StorageIndex StorageIndex; + public: + typedef Impl Base; + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {} + EIGEN_DEVICE_FUNC + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) {} +}; + +namespace internal { + +/** \internal Internal implementation of dense Blocks in the general case. */ +template class BlockImpl_dense + : public internal::dense_xpr_base >::type +{ + typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + // class InnerIterator; // FIXME apparently never used + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index i) + : m_xpr(xpr), + // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, + // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1, + // all other cases are invalid. + // The case a 1x1 matrix seems ambiguous, but the result is the same anyway. + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0), + m_blockRows(BlockRows==1 ? 1 : xpr.rows()), + m_blockCols(BlockCols==1 ? 1 : xpr.cols()) + {} + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), + m_blockRows(BlockRows), m_blockCols(BlockCols) + {} + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), + m_blockRows(blockRows), m_blockCols(blockCols) + {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index rowId, Index colId) + { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const + { + return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index index) + { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + EIGEN_DEVICE_FUNC + inline const CoeffReturnType coeff(Index index) const + { + return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + inline PacketScalar packet(Index rowId, Index colId) const + { + return m_xpr.template packet(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + template + inline void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + m_xpr.template writePacket(rowId + m_startRow.value(), colId + m_startCol.value(), val); + } + + template + inline PacketScalar packet(Index index) const + { + return m_xpr.template packet + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + m_xpr.template writePacket + (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); + } + + #ifdef EIGEN_PARSED_BY_DOXYGEN + /** \sa MapBase::data() */ + EIGEN_DEVICE_FUNC inline const Scalar* data() const; + EIGEN_DEVICE_FUNC inline Index innerStride() const; + EIGEN_DEVICE_FUNC inline Index outerStride() const; + #endif + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const + { + return m_xpr; + } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } + + EIGEN_DEVICE_FUNC + StorageIndex startRow() const + { + return m_startRow.value(); + } + + EIGEN_DEVICE_FUNC + StorageIndex startCol() const + { + return m_startCol.value(); + } + + protected: + + XprTypeNested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; +}; + +/** \internal Internal implementation of dense Blocks in the direct access case.*/ +template +class BlockImpl_dense + : public MapBase > +{ + typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + enum { + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0 + }; + public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index i) + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), + BlockRows==1 ? 1 : xpr.rows(), + BlockCols==1 ? 1 : xpr.cols()), + m_xpr(xpr), + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0) + { + init(); + } + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) + { + init(); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, + Index startRow, Index startCol, + Index blockRows, Index blockCols) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) + { + init(); + } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const + { + return m_xpr; + } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } + + /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return internal::traits::HasSameStorageOrderAsXprType + ? m_xpr.innerStride() + : m_xpr.outerStride(); + } + + /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return m_outerStride; + } + + EIGEN_DEVICE_FUNC + StorageIndex startRow() const + { + return m_startRow.value(); + } + + EIGEN_DEVICE_FUNC + StorageIndex startCol() const + { + return m_startCol.value(); + } + + #ifndef __SUNPRO_CC + // FIXME sunstudio is not friendly with the above friend... + // META-FIXME there is no 'friend' keyword around here. Is this obsolete? + protected: + #endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal used by allowAligned() */ + EIGEN_DEVICE_FUNC + inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) + : Base(data, blockRows, blockCols), m_xpr(xpr) + { + init(); + } + #endif + + protected: + EIGEN_DEVICE_FUNC + void init() + { + m_outerStride = internal::traits::HasSameStorageOrderAsXprType + ? m_xpr.outerStride() + : m_xpr.innerStride(); + } + + XprTypeNested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + Index m_outerStride; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BLOCK_H diff --git a/thirdparty/eigen/Eigen/src/Core/BooleanRedux.h b/thirdparty/eigen/Eigen/src/Core/BooleanRedux.h new file mode 100644 index 000000000..8409d8749 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/BooleanRedux.h @@ -0,0 +1,164 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ALLANDANY_H +#define EIGEN_ALLANDANY_H + +namespace Eigen { + +namespace internal { + +template +struct all_unroller +{ + typedef typename Derived::ExpressionTraits Traits; + enum { + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime + }; + + static inline bool run(const Derived &mat) + { + return all_unroller::run(mat) && mat.coeff(row, col); + } +}; + +template +struct all_unroller +{ + static inline bool run(const Derived &/*mat*/) { return true; } +}; + +template +struct all_unroller +{ + static inline bool run(const Derived &) { return false; } +}; + +template +struct any_unroller +{ + typedef typename Derived::ExpressionTraits Traits; + enum { + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime + }; + + static inline bool run(const Derived &mat) + { + return any_unroller::run(mat) || mat.coeff(row, col); + } +}; + +template +struct any_unroller +{ + static inline bool run(const Derived & /*mat*/) { return false; } +}; + +template +struct any_unroller +{ + static inline bool run(const Derived &) { return false; } +}; + +} // end namespace internal + +/** \returns true if all coefficients are true + * + * Example: \include MatrixBase_all.cpp + * Output: \verbinclude MatrixBase_all.out + * + * \sa any(), Cwise::operator<() + */ +template +inline bool DenseBase::all() const +{ + typedef internal::evaluator Evaluator; + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + }; + Evaluator evaluator(derived()); + if(unroll) + return internal::all_unroller::run(evaluator); + else + { + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if (!evaluator.coeff(i, j)) return false; + return true; + } +} + +/** \returns true if at least one coefficient is true + * + * \sa all() + */ +template +inline bool DenseBase::any() const +{ + typedef internal::evaluator Evaluator; + enum { + unroll = SizeAtCompileTime != Dynamic + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + }; + Evaluator evaluator(derived()); + if(unroll) + return internal::any_unroller::run(evaluator); + else + { + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if (evaluator.coeff(i, j)) return true; + return false; + } +} + +/** \returns the number of coefficients which evaluate to true + * + * \sa all(), any() + */ +template +inline Eigen::Index DenseBase::count() const +{ + return derived().template cast().template cast().sum(); +} + +/** \returns true is \c *this contains at least one Not A Number (NaN). + * + * \sa allFinite() + */ +template +inline bool DenseBase::hasNaN() const +{ +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else + return !((derived().array()==derived().array()).all()); +#endif +} + +/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. + * + * \sa hasNaN() + */ +template +inline bool DenseBase::allFinite() const +{ +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else + return !((derived()-derived()).hasNaN()); +#endif +} + +} // end namespace Eigen + +#endif // EIGEN_ALLANDANY_H diff --git a/thirdparty/eigen/Eigen/src/Core/CommaInitializer.h b/thirdparty/eigen/Eigen/src/Core/CommaInitializer.h new file mode 100644 index 000000000..d218e9814 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CommaInitializer.h @@ -0,0 +1,160 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMMAINITIALIZER_H +#define EIGEN_COMMAINITIALIZER_H + +namespace Eigen { + +/** \class CommaInitializer + * \ingroup Core_Module + * + * \brief Helper class used by the comma initializer operator + * + * This class is internally used to implement the comma initializer feature. It is + * the return type of MatrixBase::operator<<, and most of the time this is the only + * way it is used. + * + * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() + */ +template +struct CommaInitializer +{ + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC + inline CommaInitializer(XprType& xpr, const Scalar& s) + : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) + { + m_xpr.coeffRef(0,0) = s; + } + + template + EIGEN_DEVICE_FUNC + inline CommaInitializer(XprType& xpr, const DenseBase& other) + : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) + { + m_xpr.block(0, 0, other.rows(), other.cols()) = other; + } + + /* Copy/Move constructor which transfers ownership. This is crucial in + * absence of return value optimization to avoid assertions during destruction. */ + // FIXME in C++11 mode this could be replaced by a proper RValue constructor + EIGEN_DEVICE_FUNC + inline CommaInitializer(const CommaInitializer& o) + : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { + // Mark original object as finished. In absence of R-value references we need to const_cast: + const_cast(o).m_row = m_xpr.rows(); + const_cast(o).m_col = m_xpr.cols(); + const_cast(o).m_currentBlockRows = 0; + } + + /* inserts a scalar value in the target matrix */ + EIGEN_DEVICE_FUNC + CommaInitializer& operator,(const Scalar& s) + { + if (m_col==m_xpr.cols()) + { + m_row+=m_currentBlockRows; + m_col = 0; + m_currentBlockRows = 1; + eigen_assert(m_row + EIGEN_DEVICE_FUNC + CommaInitializer& operator,(const DenseBase& other) + { + if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows)) + { + m_row+=m_currentBlockRows; + m_col = 0; + m_currentBlockRows = other.rows(); + eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() + && "Too many rows passed to comma initializer (operator<<)"); + } + eigen_assert((m_col + other.cols() <= m_xpr.cols()) + && "Too many coefficients passed to comma initializer (operator<<)"); + eigen_assert(m_currentBlockRows==other.rows()); + m_xpr.template block + (m_row, m_col, other.rows(), other.cols()) = other; + m_col += other.cols(); + return *this; + } + + EIGEN_DEVICE_FUNC + inline ~CommaInitializer() +#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) +#endif + { + finished(); + } + + /** \returns the built matrix once all its coefficients have been set. + * Calling finished is 100% optional. Its purpose is to write expressions + * like this: + * \code + * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); + * \endcode + */ + EIGEN_DEVICE_FUNC + inline XprType& finished() { + eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) + && m_col == m_xpr.cols() + && "Too few coefficients passed to comma initializer (operator<<)"); + return m_xpr; + } + + XprType& m_xpr; // target expression + Index m_row; // current row id + Index m_col; // current col id + Index m_currentBlockRows; // current block height +}; + +/** \anchor MatrixBaseCommaInitRef + * Convenient operator to set the coefficients of a matrix. + * + * The coefficients must be provided in a row major order and exactly match + * the size of the matrix. Otherwise an assertion is raised. + * + * Example: \include MatrixBase_set.cpp + * Output: \verbinclude MatrixBase_set.out + * + * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary order. + * + * \sa CommaInitializer::finished(), class CommaInitializer + */ +template +inline CommaInitializer DenseBase::operator<< (const Scalar& s) +{ + return CommaInitializer(*static_cast(this), s); +} + +/** \sa operator<<(const Scalar&) */ +template +template +inline CommaInitializer +DenseBase::operator<<(const DenseBase& other) +{ + return CommaInitializer(*static_cast(this), other); +} + +} // end namespace Eigen + +#endif // EIGEN_COMMAINITIALIZER_H diff --git a/thirdparty/eigen/Eigen/src/Core/ConditionEstimator.h b/thirdparty/eigen/Eigen/src/Core/ConditionEstimator.h new file mode 100644 index 000000000..aa7efdc76 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/ConditionEstimator.h @@ -0,0 +1,175 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONDITIONESTIMATOR_H +#define EIGEN_CONDITIONESTIMATOR_H + +namespace Eigen { + +namespace internal { + +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + const RealVector v_abs = v.cwiseAbs(); + return (v_abs.array() == static_cast(0)) + .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs)); + } +}; + +// Partial specialization to avoid elementwise division for real vectors. +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + return (v.array() < static_cast(0)) + .select(-Vector::Ones(v.size()), Vector::Ones(v.size())); + } +}; + +/** + * \returns an estimate of ||inv(matrix)||_1 given a decomposition of + * \a matrix that implements .solve() and .adjoint().solve() methods. + * + * This function implements Algorithms 4.1 and 5.1 from + * http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf + * which also forms the basis for the condition number estimators in + * LAPACK. Since at most 10 calls to the solve method of dec are + * performed, the total cost is O(dims^2), as opposed to O(dims^3) + * needed to compute the inverse matrix explicitly. + * + * The most common usage is in estimating the condition number + * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be + * computed directly in O(n^2) operations. + * + * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and + * LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec) +{ + typedef typename Decomposition::MatrixType MatrixType; + typedef typename Decomposition::Scalar Scalar; + typedef typename Decomposition::RealScalar RealScalar; + typedef typename internal::plain_col_type::type Vector; + typedef typename internal::plain_col_type::type RealVector; + const bool is_complex = (NumTraits::IsComplex != 0); + + eigen_assert(dec.rows() == dec.cols()); + const Index n = dec.rows(); + if (n == 0) + return 0; + + // Disable Index to float conversion warning +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + Vector v = dec.solve(Vector::Ones(n) / Scalar(n)); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif + + // lower_bound is a lower bound on + // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 + // and is the objective maximized by the ("super-") gradient ascent + // algorithm below. + RealScalar lower_bound = v.template lpNorm<1>(); + if (n == 1) + return lower_bound; + + // Gradient ascent algorithm follows: We know that the optimum is achieved at + // one of the simplices v = e_i, so in each iteration we follow a + // super-gradient to move towards the optimal one. + RealScalar old_lower_bound = lower_bound; + Vector sign_vector(n); + Vector old_sign_vector; + Index v_max_abs_index = -1; + Index old_v_max_abs_index = v_max_abs_index; + for (int k = 0; k < 4; ++k) + { + sign_vector = internal::rcond_compute_sign::run(v); + if (k > 0 && !is_complex && sign_vector == old_sign_vector) { + // Break if the solution stagnated. + break; + } + // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )| + v = dec.adjoint().solve(sign_vector); + v.real().cwiseAbs().maxCoeff(&v_max_abs_index); + if (v_max_abs_index == old_v_max_abs_index) { + // Break if the solution stagnated. + break; + } + // Move to the new simplex e_j, where j = v_max_abs_index. + v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j. + lower_bound = v.template lpNorm<1>(); + if (lower_bound <= old_lower_bound) { + // Break if the gradient step did not increase the lower_bound. + break; + } + if (!is_complex) { + old_sign_vector = sign_vector; + } + old_v_max_abs_index = v_max_abs_index; + old_lower_bound = lower_bound; + } + // The following calculates an independent estimate of ||matrix||_1 by + // multiplying matrix by a vector with entries of slowly increasing + // magnitude and alternating sign: + // v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1. + // This improvement to Hager's algorithm above is due to Higham. It was + // added to make the algorithm more robust in certain corner cases where + // large elements in the matrix might otherwise escape detection due to + // exact cancellation (especially when op and op_adjoint correspond to a + // sequence of backsubstitutions and permutations), which could cause + // Hager's algorithm to vastly underestimate ||matrix||_1. + Scalar alternating_sign(RealScalar(1)); + for (Index i = 0; i < n; ++i) { + // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates + v[i] = alternating_sign * static_cast(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); + alternating_sign = -alternating_sign; + } + v = dec.solve(v); + const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n)); + return numext::maxi(lower_bound, alternate_lower_bound); +} + +/** \brief Reciprocal condition number estimator. + * + * Computing a decomposition of a dense matrix takes O(n^3) operations, while + * this method estimates the condition number quickly and reliably in O(n^2) + * operations. + * + * \returns an estimate of the reciprocal condition number + * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and + * its decomposition. Supports the following decompositions: FullPivLU, + * PartialPivLU, LDLT, and LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar +rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec) +{ + typedef typename Decomposition::RealScalar RealScalar; + eigen_assert(dec.rows() == dec.cols()); + if (dec.rows() == 0) return RealScalar(1); + if (matrix_norm == RealScalar(0)) return RealScalar(0); + if (dec.rows() == 1) return RealScalar(1); + const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); + return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0) + : (RealScalar(1) / inverse_matrix_norm) / matrix_norm); +} + +} // namespace internal + +} // namespace Eigen + +#endif diff --git a/thirdparty/eigen/Eigen/src/Core/CoreEvaluators.h b/thirdparty/eigen/Eigen/src/Core/CoreEvaluators.h new file mode 100644 index 000000000..1d14af652 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CoreEvaluators.h @@ -0,0 +1,1673 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_COREEVALUATORS_H +#define EIGEN_COREEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template struct storage_kind_to_shape; + +template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; +template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; +template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename Arg1Kind = typename evaluator_traits::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> struct ternary_evaluator; + +template< typename T, + typename LhsKind = typename evaluator_traits::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits contains traits for evaluator + +template +struct evaluator_traits_base +{ + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; +}; + +// Default evaluator traits +template +struct evaluator_traits : public evaluator_traits_base +{ +}; + +template::Shape > +struct evaluator_assume_aliasing { + static const bool value = false; +}; + +// By default, we assume a unary expression: +template +struct evaluator : public unary_evaluator +{ + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} +}; + + +// TODO: Think about const-correctness +template +struct evaluator + : evaluator +{ + EIGEN_DEVICE_FUNC + explicit evaluator(const T& xpr) : evaluator(xpr) {} +}; + +// ---------- base class for all evaluators ---------- + +template +struct evaluator_base : public noncopyable +{ + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; + + enum { + Alignment = 0 + }; +}; + +// -------------------- Matrix and Array -------------------- +// +// evaluator is a common base class for the +// Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. + +template +struct evaluator > + : evaluator_base +{ + typedef PlainObjectBase PlainObjectType; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment + }; + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + if (IsRowMajor) + return m_data[row * m_outerStride.value() + col]; + else + return m_data[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_data[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + if (IsRowMajor) + return const_cast(m_data)[row * m_outerStride.value() + col]; + else + return const_cast(m_data)[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return const_cast(m_data)[index]; + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + if (IsRowMajor) + return ploadt(m_data + row * m_outerStride.value() + col); + else + return ploadt(m_data + row + col * m_outerStride.value()); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return ploadt(m_data + index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + if (IsRowMajor) + return pstoret + (const_cast(m_data) + row * m_outerStride.value() + col, x); + else + return pstoret + (const_cast(m_data) + row + col * m_outerStride.value(), x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + return pstoret(const_cast(m_data) + index, x); + } + +protected: + const Scalar *m_data; + + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Matrix XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Array XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +// -------------------- Transpose -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename XprType::Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(col, row); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(col, row, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +// -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. + +template::value, + bool has_unary = has_unary_operator::value, + bool has_binary = has_binary_operator::value> +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp(); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp(i,j); } +}; + +// We need the following specialization for vector-only functors assigned to a runtime vector, +// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd. +// In this case, i==0 and j is used for the actual iteration. +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op(i+j); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op.template packetOp(i+j); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper {}; + +#if 0 && EIGEN_COMP_MSVC>0 +// Disable this ugly workaround. This is now handled in traits::match, +// but this piece of code might still become handly if some other weird compilation +// erros pop up again. + +// MSVC exhibits a weird compilation error when +// compiling: +// Eigen::MatrixXf A = MatrixXf::Random(3,3); +// Ref R = 2.f*A; +// and that has_*ary_operator> have not been instantiated yet. +// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> +// and at that time has_*ary_operator returns true regardless of T. +// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. +// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), +// and packet() are really instantiated as implemented below: + +// This is a simple wrapper around Index to enforce the re-instantiation of +// has_*ary_operator when needed. +template struct nullary_wrapper_workaround_msvc { + nullary_wrapper_workaround_msvc(const T&); + operator T()const; +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i); + } +}; +#endif // MSVC workaround + +template +struct evaluator > + : evaluator_base > +{ + typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) + : m_functor(n.functor()), m_wrapper() + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(IndexType row, IndexType col) const + { + return m_wrapper(m_functor, row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(IndexType index) const + { + return m_wrapper(m_functor,index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(IndexType row, IndexType col) const + { + return m_wrapper.template packetOp(m_functor, row, col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(IndexType index) const + { + return m_wrapper.template packetOp(m_functor, index); + } + +protected: + const NullaryOp m_functor; + const internal::nullary_wrapper m_wrapper; +}; + +// -------------------- CwiseUnaryOp -------------------- + +template +struct unary_evaluator, IndexBased > + : evaluator_base > +{ + typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_argImpl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + +protected: + const UnaryOp m_functor; + evaluator m_argImpl; +}; + +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator > + : public ternary_evaluator > +{ + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && is_same::value, + StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & ( + HereditaryBits + | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN( + EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + +protected: + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; +}; + +// -------------------- CwiseBinaryOp -------------------- + +// this is a binary expression +template +struct evaluator > + : public binary_evaluator > +{ + typedef CwiseBinaryOp XprType; + typedef binary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); + } + +protected: + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; +}; + +// -------------------- CwiseUnaryView -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_unaryOp(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_unaryOp(m_argImpl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_unaryOp(m_argImpl.coeffRef(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_unaryOp(m_argImpl.coeffRef(index)); + } + +protected: + const UnaryOp m_unaryOp; + evaluator m_argImpl; +}; + +// -------------------- Map -------------------- + +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base +{ + typedef Derived XprType; + typedef typename XprType::PointerType PointerType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost + }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_data[index * m_innerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_data[index * m_innerStride.value()]; + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return internal::ploadt(ptr); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return internal::ploadt(m_data + index * m_innerStride.value()); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return internal::pstoret(ptr, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + internal::pstoret(m_data + index * m_innerStride.value(), x); + } +protected: + EIGEN_DEVICE_FUNC + inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } + EIGEN_DEVICE_FUNC + inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + + PointerType m_data; + const internal::variable_if_dynamic m_innerStride; + const internal::variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Map XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), + + Alignment = int(MapOptions)&int(AlignedMask) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator(map) + { } +}; + +// -------------------- Ref -------------------- + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Ref XprType; + + enum { + Flags = evaluator >::Flags, + Alignment = evaluator >::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) + { } +}; + +// -------------------- Block -------------------- + +template::ret> struct block_evaluator; + +template +struct evaluator > + : block_evaluator +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) + }; + typedef block_evaluator block_evaluator_type; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +// no direct-access => dispatch to a unary evaluator +template +struct block_evaluator + : unary_evaluator > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator(block) + {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); + } + +protected: + evaluator m_argImpl; + const variable_if_dynamic m_startRow; + const variable_if_dynamic m_startCol; +}; + +// TODO: This evaluator does not actually use the child evaluator; +// all action is via the data() as returned by the Block expression. + +template +struct block_evaluator + : mapbase_evaluator, + typename Block::PlainObject> +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) + { + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime + eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); + } +}; + + +// -------------------- Select -------------------- +// NOTE shall we introduce a ternary_evaluator? + +// TODO enable vectorization for Select +template +struct evaluator > + : evaluator_base > +{ + typedef Select XprType; + enum { + CoeffReadCost = evaluator::CoeffReadCost + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), + + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + : m_conditionImpl(select.conditionMatrix()), + m_thenImpl(select.thenMatrix()), + m_elseImpl(select.elseMatrix()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + if (m_conditionImpl.coeff(row, col)) + return m_thenImpl.coeff(row, col); + else + return m_elseImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + if (m_conditionImpl.coeff(index)) + return m_thenImpl.coeff(index); + else + return m_elseImpl.coeff(index); + } + +protected: + evaluator m_conditionImpl; + evaluator m_thenImpl; + evaluator m_elseImpl; +}; + + +// -------------------- Replicate -------------------- + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Replicate XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.coeff(actual_row, actual_col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.coeff(actual_index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.template packet(actual_row, actual_col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.template packet(actual_index); + } + +protected: + const ArgTypeNested m_arg; + evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- PartialReduxExpr -------------------- + +template< typename ArgType, typename MemberOp, int Direction> +struct evaluator > + : evaluator_base > +{ + typedef PartialReduxExpr XprType; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? HugeCost + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit, + + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); + } + +protected: + typename internal::add_const_on_value_type::type m_arg; + const MemberOp m_functor; +}; + + +// -------------------- MatrixWrapper and ArrayWrapper -------------------- +// +// evaluator_wrapper_base is a common base class for the +// MatrixWrapper and ArrayWrapper evaluators. + +template +struct evaluator_wrapper_base + : evaluator_base +{ + typedef typename remove_all::type ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + + typedef typename ArgType::Scalar Scalar; + typedef typename ArgType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(row, col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(row, col, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef MatrixWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef ArrayWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + + +// -------------------- Reverse -------------------- + +// defined in Reverse.h: +template struct reverse_packet_cond; + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Reverse XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::IsRowMajor, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) + : m_argImpl(reverse.nestedExpression()), + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + return reverse_packet::run(m_argImpl.template packet( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + enum { PacketSize = unpacket_traits::size }; + return preverse(m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + // FIXME we could factorize some code with packet(i,j) + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + m_argImpl.template writePacket( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col, + reverse_packet::run(x)); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + enum { PacketSize = unpacket_traits::size }; + m_argImpl.template writePacket + (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); + } + +protected: + evaluator m_argImpl; + + // If we do not reverse rows, then we do not need to know the number of rows; same for columns + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- Diagonal -------------------- + +template +struct evaluator > + : evaluator_base > +{ + typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) + : m_argImpl(diagonal.nestedExpression()), + m_index(diagonal.index()) + { } + + typedef typename XprType::Scalar Scalar; + // FIXME having to check whether ArgType is sparse here i not very nice. + typedef typename internal::conditional::value, + typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index) const + { + return m_argImpl.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index + rowOffset(), index + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index) + { + return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); + } + +protected: + evaluator m_argImpl; + const internal::variable_if_dynamicindex m_index; + +private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template class EvalToTemp; + +template +struct traits > + : public traits +{ }; + +template +class EvalToTemp + : public dense_xpr_base >::type +{ + public: + + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator > + : public evaluator +{ + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.arg()) + { + ::new (static_cast(this)) Base(m_result); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg) + { + ::new (static_cast(this)) Base(m_result); + } + +protected: + PlainObject m_result; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREEVALUATORS_H diff --git a/thirdparty/eigen/Eigen/src/Core/CoreIterators.h b/thirdparty/eigen/Eigen/src/Core/CoreIterators.h new file mode 100644 index 000000000..4eb42b93a --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CoreIterators.h @@ -0,0 +1,127 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COREITERATORS_H +#define EIGEN_COREITERATORS_H + +namespace Eigen { + +/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core + */ + +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example + */ +template +class InnerIterator +{ +protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef internal::evaluator EvaluatorType; + typedef typename internal::traits::Scalar Scalar; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template InnerIterator(const EigenBase&,Index outer); +}; + +namespace internal { + +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector +{ +protected: + typedef evaluator EvaluatorType; + typedef typename traits::Scalar Scalar; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; +}; + +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector + : public evaluator::InnerIterator +{ +protected: + typedef typename evaluator::InnerIterator Base; + typedef evaluator EvaluatorType; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREITERATORS_H diff --git a/thirdparty/eigen/Eigen/src/Core/CwiseBinaryOp.h b/thirdparty/eigen/Eigen/src/Core/CwiseBinaryOp.h new file mode 100644 index 000000000..9ddbfe286 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -0,0 +1,184 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_BINARY_OP_H +#define EIGEN_CWISE_BINARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > +{ + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of< + BinaryOp( + const typename Lhs::Scalar&, + const typename Rhs::Scalar& + ) + >::type Scalar; + typedef typename cwise_promote_storage_type::StorageKind, + typename traits::StorageKind, + BinaryOp>::ret StorageKind; + typedef typename promote_index_type::StorageIndex, + typename traits::StorageIndex>::type StorageIndex; + typedef typename Lhs::Nested LhsNested; + typedef typename Rhs::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + enum { + Flags = _LhsNested::Flags & RowMajorBit + }; +}; +} // end namespace internal + +template +class CwiseBinaryOpImpl; + +/** \class CwiseBinaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions + * + * \tparam BinaryOp template functor implementing the operator + * \tparam LhsType the type of the left-hand side + * \tparam RhsType the type of the right-hand side + * + * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. + * It is the return type of binary operators, by which we mean only those binary operators where + * both the left-hand side and the right-hand side are Eigen expressions. + * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseBinaryOp types explicitly. + * + * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseBinaryOp : + public CwiseBinaryOpImpl< + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator +{ + public: + + typedef typename internal::remove_all::type Functor; + typedef typename internal::remove_all::type Lhs; + typedef typename internal::remove_all::type Rhs; + + typedef typename CwiseBinaryOpImpl< + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_reference::type _LhsNested; + typedef typename internal::remove_reference::type _RhsNested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp()) + : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) + { + EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename Rhs::Scalar); + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs) + eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time optimizations + if (internal::traits::type>::RowsAtCompileTime==Dynamic) + return m_rhs.rows(); + else + return m_lhs.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time optimizations + if (internal::traits::type>::ColsAtCompileTime==Dynamic) + return m_rhs.cols(); + else + return m_lhs.cols(); + } + + /** \returns the left hand side nested expression */ + EIGEN_DEVICE_FUNC + const _LhsNested& lhs() const { return m_lhs; } + /** \returns the right hand side nested expression */ + EIGEN_DEVICE_FUNC + const _RhsNested& rhs() const { return m_rhs; } + /** \returns the functor representing the binary operation */ + EIGEN_DEVICE_FUNC + const BinaryOp& functor() const { return m_functor; } + + protected: + LhsNested m_lhs; + RhsNested m_rhs; + const BinaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseBinaryOpImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +MatrixBase::operator-=(const MatrixBase &other) +{ + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_STRONG_INLINE Derived & +MatrixBase::operator+=(const MatrixBase& other) +{ + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/thirdparty/eigen/Eigen/src/Core/CwiseNullaryOp.h b/thirdparty/eigen/Eigen/src/Core/CwiseNullaryOp.h new file mode 100644 index 000000000..dd498f758 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -0,0 +1,866 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_NULLARY_OP_H +#define EIGEN_CWISE_NULLARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > : traits +{ + enum { + Flags = traits::Flags & RowMajorBit + }; +}; + +} // namespace internal + +/** \class CwiseNullaryOp + * \ingroup Core_Module + * + * \brief Generic expression of a matrix where all coefficients are defined by a functor + * + * \tparam NullaryOp template functor implementing the operator + * \tparam PlainObjectType the underlying plain matrix/array type + * + * This class represents an expression of a generic nullary operator. + * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods, + * and most of the time this is the only way it is used. + * + * However, if you want to write a function returning such an expression, you + * will need to use this class. + * + * The functor NullaryOp must expose one of the following method: + + + + +
\c operator()() if the procedural generation does not depend on the coefficient entries (e.g., random numbers)
\c operator()(Index i)if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace)
\c operator()(Index i,Index j)if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)
+ * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors. + * + * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding + * C++11 random number generators. + * + * A nullary expression can also be used to implement custom sophisticated matrix manipulations + * that cannot be covered by the existing set of natively supported matrix manipulations. + * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations + * on the behavior of CwiseNullaryOp. + * + * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr + */ +template +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) + + EIGEN_DEVICE_FUNC + CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + : m_rows(rows), m_cols(cols), m_functor(func) + { + eigen_assert(rows >= 0 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } + + /** \returns the functor representing the nullary operation */ + EIGEN_DEVICE_FUNC + const NullaryOp& functor() const { return m_functor; } + + protected: + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; + const NullaryOp m_functor; +}; + + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) +{ + return CwiseNullaryOp(rows, cols, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * Here is an example with C++11 random generators: \include random_cpp11.cpp + * Output: \verbinclude random_cpp11.out + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> +DenseBase::NullaryExpr(const CustomNullaryOp& func) +{ + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this DenseBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index rows, Index cols, const Scalar& value) +{ + return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this DenseBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index size, const Scalar& value) +{ + return DenseBase::NullaryExpr(size, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(const Scalar& value) +{ + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op(value)); +} + +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&) + * + * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); +} + +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&) + * + * \sa LinSpaced(Scalar,Scalar) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced.cpp + * Output: \verbinclude DenseBase_LinSpaced.out + * + * For integer scalar types, an even spacing is possible if and only if the length of the range, + * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the + * number of values \c high-low+1 (meaning each value can be repeated the same number of time). + * If one of these two considions is not satisfied, then \c high is lowered to the largest value + * satisfying one of this constraint. + * Here are some examples: + * + * Example: \include DenseBase_LinSpacedInt.cpp + * Output: \verbinclude DenseBase_LinSpacedInt.out + * + * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); +} + +/** + * \copydoc DenseBase::LinSpaced(Index, const Scalar&, const Scalar&) + * Special version for fixed size types which does not require the size parameter. + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); +} + +/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ +template +bool DenseBase::isApproxToConstant +(const Scalar& val, const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if(!internal::isApprox(self.coeff(i, j), val, prec)) + return false; + return true; +} + +/** This is just an alias for isApproxToConstant(). + * + * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ +template +bool DenseBase::isConstant +(const Scalar& val, const RealScalar& prec) const +{ + return isApproxToConstant(val, prec); +} + +/** Alias for setConstant(): sets all coefficients in this expression to \a val. + * + * \sa setConstant(), Constant(), class CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) +{ + setConstant(val); +} + +/** Sets all coefficients in this expression to value \a val. + * + * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) +{ + return derived() = Constant(rows(), cols(), val); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. + * + * \only_for_vectors + * + * Example: \include Matrix_setConstant_int.cpp + * Output: \verbinclude Matrix_setConstant_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index size, const Scalar& val) +{ + resize(size); + return setConstant(val); +} + +/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val. + * + * \param rows the new number of rows + * \param cols the new number of columns + * \param val the value to which all coefficients are set + * + * Example: \include Matrix_setConstant_int_int.cpp + * Output: \verbinclude Matrix_setConstant_int_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) +{ + resize(rows, cols); + return setConstant(val); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_setLinSpaced.cpp + * Output: \verbinclude DenseBase_setLinSpaced.out + * + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function fills \c *this with equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return setLinSpaced(size(), low, high); +} + +// zero: + +/** \returns an expression of a zero matrix. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int_int.cpp + * Output: \verbinclude MatrixBase_zero_int_int.out + * + * \sa Zero(), Zero(Index) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index rows, Index cols) +{ + return Constant(rows, cols, Scalar(0)); +} + +/** \returns an expression of a zero vector. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int.cpp + * Output: \verbinclude MatrixBase_zero_int.out + * + * \sa Zero(), Zero(Index,Index) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index size) +{ + return Constant(size, Scalar(0)); +} + +/** \returns an expression of a fixed-size zero matrix or vector. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_zero.cpp + * Output: \verbinclude MatrixBase_zero.out + * + * \sa Zero(Index), Zero(Index,Index) + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero() +{ + return Constant(Scalar(0)); +} + +/** \returns true if *this is approximately equal to the zero matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isZero.cpp + * Output: \verbinclude MatrixBase_isZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +bool DenseBase::isZero(const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < rows(); ++i) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) + return false; + return true; +} + +/** Sets all coefficients in this expression to zero. + * + * Example: \include MatrixBase_setZero.cpp + * Output: \verbinclude MatrixBase_setZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setZero() +{ + return setConstant(Scalar(0)); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to zero. + * + * \only_for_vectors + * + * Example: \include Matrix_setZero_int.cpp + * Output: \verbinclude Matrix_setZero_int.out + * + * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index newSize) +{ + resize(newSize); + return setConstant(Scalar(0)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to zero. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setZero_int_int.cpp + * Output: \verbinclude Matrix_setZero_int_int.out + * + * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index rows, Index cols) +{ + resize(rows, cols); + return setConstant(Scalar(0)); +} + +// ones: + +/** \returns an expression of a matrix where all coefficients equal one. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int_int.cpp + * Output: \verbinclude MatrixBase_ones_int_int.out + * + * \sa Ones(), Ones(Index), isOnes(), class Ones + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index rows, Index cols) +{ + return Constant(rows, cols, Scalar(1)); +} + +/** \returns an expression of a vector where all coefficients equal one. + * + * The parameter \a newSize is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int.cpp + * Output: \verbinclude MatrixBase_ones_int.out + * + * \sa Ones(), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index newSize) +{ + return Constant(newSize, Scalar(1)); +} + +/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_ones.cpp + * Output: \verbinclude MatrixBase_ones.out + * + * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones() +{ + return Constant(Scalar(1)); +} + +/** \returns true if *this is approximately equal to the matrix where all coefficients + * are equal to 1, within the precision given by \a prec. + * + * Example: \include MatrixBase_isOnes.cpp + * Output: \verbinclude MatrixBase_isOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +bool DenseBase::isOnes +(const RealScalar& prec) const +{ + return isApproxToConstant(Scalar(1), prec); +} + +/** Sets all coefficients in this expression to one. + * + * Example: \include MatrixBase_setOnes.cpp + * Output: \verbinclude MatrixBase_setOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() +{ + return setConstant(Scalar(1)); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to one. + * + * \only_for_vectors + * + * Example: \include Matrix_setOnes_int.cpp + * Output: \verbinclude Matrix_setOnes_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index newSize) +{ + resize(newSize); + return setConstant(Scalar(1)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to one. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setOnes_int_int.cpp + * Output: \verbinclude Matrix_setOnes_int_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index rows, Index cols) +{ + resize(rows, cols); + return setConstant(Scalar(1)); +} + +// Identity: + +/** \returns an expression of the identity matrix (not necessarily square). + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used + * instead. + * + * Example: \include MatrixBase_identity_int_int.cpp + * Output: \verbinclude MatrixBase_identity_int_int.out + * + * \sa Identity(), setIdentity(), isIdentity() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity(Index rows, Index cols) +{ + return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); +} + +/** \returns an expression of the identity matrix (not necessarily square). + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variant taking size arguments. + * + * Example: \include MatrixBase_identity.cpp + * Output: \verbinclude MatrixBase_identity.out + * + * \sa Identity(Index,Index), setIdentity(), isIdentity() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity() +{ + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return MatrixBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op()); +} + +/** \returns true if *this is approximately equal to the identity matrix + * (not necessarily square), + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isIdentity.cpp + * Output: \verbinclude MatrixBase_isIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity() + */ +template +bool MatrixBase::isIdentity +(const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index j = 0; j < cols(); ++j) + { + for(Index i = 0; i < rows(); ++i) + { + if(i == j) + { + if(!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) + return false; + } + else + { + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) + return false; + } + } + } + return true; +} + +namespace internal { + +template=16)> +struct setIdentity_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Derived& run(Derived& m) + { + return m = Derived::Identity(m.rows(), m.cols()); + } +}; + +template +struct setIdentity_impl +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Derived& run(Derived& m) + { + m.setZero(); + const Index size = numext::mini(m.rows(), m.cols()); + for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); + return m; + } +}; + +} // end namespace internal + +/** Writes the identity expression (not necessarily square) into *this. + * + * Example: \include MatrixBase_setIdentity.cpp + * Output: \verbinclude MatrixBase_setIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() + */ +template +EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() +{ + return internal::setIdentity_impl::run(derived()); +} + +/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setIdentity_int_int.cpp + * Output: \verbinclude Matrix_setIdentity_int_int.out + * + * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() + */ +template +EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) +{ + derived().resize(rows, cols); + return setIdentity(); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * This variant is for fixed-size vector only. + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(),i); +} + +/** \returns an expression of the X axis unit vector (1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() +{ return Derived::Unit(0); } + +/** \returns an expression of the Y axis unit vector (0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() +{ return Derived::Unit(1); } + +/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() +{ return Derived::Unit(2); } + +/** \returns an expression of the W axis unit vector (0,0,0,1) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() +{ return Derived::Unit(3); } + +} // end namespace Eigen + +#endif // EIGEN_CWISE_NULLARY_OP_H diff --git a/thirdparty/eigen/Eigen/src/Core/CwiseTernaryOp.h b/thirdparty/eigen/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 000000000..9f3576fec --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,197 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + enum { Flags = _Arg1Nested::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>, + internal::no_assignment_operator +{ + public: + typedef typename internal::remove_all::type Arg1; + typedef typename internal::remove_all::type Arg2; + typedef typename internal::remove_all::type Arg3; + + typedef typename CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef typename internal::remove_reference::type _Arg1Nested; + typedef typename internal::remove_reference::type _Arg2Nested; + typedef typename internal::remove_reference::type _Arg3Nested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, + const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && + a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg1Nested& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg2Nested& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg3Nested& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl + : public internal::generic_xpr_base< + CwiseTernaryOp >::type { + public: + typedef typename internal::generic_xpr_base< + CwiseTernaryOp >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/thirdparty/eigen/Eigen/src/Core/CwiseUnaryOp.h b/thirdparty/eigen/Eigen/src/Core/CwiseUnaryOp.h new file mode 100644 index 000000000..1d2dd19f2 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CwiseUnaryOp.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_OP_H +#define EIGEN_CWISE_UNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename result_of< + UnaryOp(const typename XprType::Scalar&) + >::type Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + enum { + Flags = _XprTypeNested::Flags & RowMajorBit + }; +}; +} + +template +class CwiseUnaryOpImpl; + +/** \class CwiseUnaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise unary operator is applied to an expression + * + * \tparam UnaryOp template functor implementing the operator + * \tparam XprType the type of the expression to which we are applying the unary operator + * + * This class represents an expression where a unary operator is applied to an expression. + * It is the return type of all operations taking exactly 1 input expression, regardless of the + * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix + * is considered unary, because only the right-hand side is an expression, and its + * return type is a specialization of CwiseUnaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseUnaryOp types explicitly. + * + * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp + */ +template +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, internal::no_assignment_operator +{ + public: + + typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type NestedExpression; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index cols() const { return m_xpr.cols(); } + + /** \returns the functor representing the unary operation */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::remove_all::type& + nestedExpression() { return m_xpr; } + + protected: + XprTypeNested m_xpr; + const UnaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseUnaryOpImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_OP_H diff --git a/thirdparty/eigen/Eigen/src/Core/CwiseUnaryView.h b/thirdparty/eigen/Eigen/src/Core/CwiseUnaryView.h new file mode 100644 index 000000000..271033056 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/CwiseUnaryView.h @@ -0,0 +1,128 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_VIEW_H +#define EIGEN_CWISE_UNARY_VIEW_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename result_of< + ViewOp(const typename traits::Scalar&) + >::type Scalar; + typedef typename MatrixType::Nested MatrixTypeNested; + typedef typename remove_all::type _MatrixTypeNested; + enum { + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions + MatrixTypeInnerStride = inner_stride_at_compile_time::ret, + // need to cast the sizeof's from size_t to int explicitly, otherwise: + // "error: no integral type can represent all of the enumerator values + InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic + ? int(Dynamic) + : int(MatrixTypeInnerStride) * int(sizeof(typename traits::Scalar) / sizeof(Scalar)), + OuterStrideAtCompileTime = outer_stride_at_compile_time::ret == Dynamic + ? int(Dynamic) + : outer_stride_at_compile_time::ret * int(sizeof(typename traits::Scalar) / sizeof(Scalar)) + }; +}; +} + +template +class CwiseUnaryViewImpl; + +/** \class CwiseUnaryView + * \ingroup Core_Module + * + * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector + * + * \tparam ViewOp template functor implementing the view + * \tparam MatrixType the type of the matrix we are applying the unary operator + * + * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. + * It is the return type of real() and imag(), and most of the time this is the only way it is used. + * + * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp + */ +template +class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind> +{ + public: + + typedef typename CwiseUnaryViewImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + typedef typename internal::remove_all::type NestedExpression; + + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) + : m_matrix(mat), m_functor(func) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) + + EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } + EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } + + /** \returns the functor representing unary operation */ + const ViewOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + const typename internal::remove_all::type& + nestedExpression() const { return m_matrix; } + + /** \returns the nested expression */ + typename internal::remove_reference::type& + nestedExpression() { return m_matrix.const_cast_derived(); } + + protected: + MatrixTypeNested m_matrix; + ViewOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseUnaryViewImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +template +class CwiseUnaryViewImpl + : public internal::dense_xpr_base< CwiseUnaryView >::type +{ + public: + + typedef CwiseUnaryView Derived; + typedef typename internal::dense_xpr_base< CwiseUnaryView >::type Base; + + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } + + EIGEN_DEVICE_FUNC inline Index innerStride() const + { + return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); + } + + EIGEN_DEVICE_FUNC inline Index outerStride() const + { + return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_VIEW_H diff --git a/thirdparty/eigen/Eigen/src/Core/DenseBase.h b/thirdparty/eigen/Eigen/src/Core/DenseBase.h new file mode 100644 index 000000000..bd74e8a13 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/DenseBase.h @@ -0,0 +1,601 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSEBASE_H +#define EIGEN_DENSEBASE_H + +namespace Eigen { + +namespace internal { + +// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type. +// This dummy function simply aims at checking that at compile time. +static inline void check_DenseIndex_is_signed() { + EIGEN_STATIC_ASSERT(NumTraits::IsSigned,THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE); +} + +} // end namespace internal + +/** \class DenseBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and arrays + * + * This class is the base that is inherited by all dense objects (matrix, vector, arrays, + * and related expression types). The common Eigen API for dense objects is contained in this class. + * + * \tparam Derived is the derived type, e.g., a matrix type or an expression. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. + * + * \sa \blank \ref TopicClassHierarchy + */ +template class DenseBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public DenseCoeffsBase +#else + : public DenseCoeffsBase +#endif // not EIGEN_PARSED_BY_DOXYGEN +{ + public: + + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; + + typedef typename internal::traits::StorageKind StorageKind; + + /** + * \brief The type used to store indices + * \details This typedef is relevant for types that store multiple indices such as + * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index + * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + */ + typedef typename internal::traits::StorageIndex StorageIndex; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. */ + typedef typename internal::traits::Scalar Scalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + + typedef typename NumTraits::Real RealScalar; + typedef DenseCoeffsBase Base; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::rowIndexByOuterInner; + using Base::colIndexByOuterInner; + using Base::coeff; + using Base::coeffByOuterInner; + using Base::operator(); + using Base::operator[]; + using Base::x; + using Base::y; + using Base::z; + using Base::w; + using Base::stride; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + /**< The number of rows at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ + + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + /**< The number of columns at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ + + + SizeAtCompileTime = (internal::size_at_compile_time::RowsAtCompileTime, + internal::traits::ColsAtCompileTime>::ret), + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ + + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + /**< This value is equal to the maximum possible number of rows that this expression + * might have. If this expression might have an arbitrarily high number of rows, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + /**< This value is equal to the maximum possible number of columns that this expression + * might have. If this expression might have an arbitrarily high number of columns, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxSizeAtCompileTime = (internal::size_at_compile_time::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime>::ret), + /**< This value is equal to the maximum possible number of coefficients that this expression + * might have. If this expression might have an arbitrarily high number of coefficients, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime + */ + + IsVectorAtCompileTime = internal::traits::MaxRowsAtCompileTime == 1 + || internal::traits::MaxColsAtCompileTime == 1, + /**< This is set to true if either the number of rows or the number of + * columns is known at compile-time to be equal to 1. Indeed, in that case, + * we are dealing with a column-vector (if there is only one column) or with + * a row-vector (if there is only one row). */ + + Flags = internal::traits::Flags, + /**< This stores expression \ref flags flags which may or may not be inherited by new expressions + * constructed from this one. See the \ref flags "list of flags". + */ + + IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */ + + InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) + : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + + InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, + OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret + }; + + typedef typename internal::find_best_packet::type PacketScalar; + + enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef typename internal::conditional::XprKind,MatrixXpr >::value, + PlainMatrix, PlainArray>::type PlainObject; + + /** \returns the number of nonzero coefficients which is in practice the number + * of stored coefficients. */ + EIGEN_DEVICE_FUNC + inline Index nonZeros() const { return size(); } + + /** \returns the outer size. + * + * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a + * column-major matrix, and the number of rows for a row-major matrix. */ + EIGEN_DEVICE_FUNC + Index outerSize() const + { + return IsVectorAtCompileTime ? 1 + : int(IsRowMajor) ? this->rows() : this->cols(); + } + + /** \returns the inner size. + * + * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a + * column-major matrix, and the number of columns for a row-major matrix. */ + EIGEN_DEVICE_FUNC + Index innerSize() const + { + return IsVectorAtCompileTime ? this->size() + : int(IsRowMajor) ? this->cols() : this->rows(); + } + + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + EIGEN_DEVICE_FUNC + void resize(Index newSize) + { + EIGEN_ONLY_USED_FOR_DEBUG(newSize); + eigen_assert(newSize == this->size() + && "DenseBase::resize() does not actually allow to resize."); + } + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does + * nothing else. + */ + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) + { + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() + && "DenseBase::resize() does not actually allow to resize."); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; + /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */ + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; + /** \internal the return type of MatrixBase::eigenvalues() */ + typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** Copies \a other into *this. \returns a reference to *this. */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const DenseBase& other); + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const DenseBase& other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const EigenBase &other); + + template + EIGEN_DEVICE_FUNC + Derived& operator+=(const EigenBase &other); + + template + EIGEN_DEVICE_FUNC + Derived& operator-=(const EigenBase &other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const ReturnByValue& func); + + /** \ínternal + * Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ + template + EIGEN_DEVICE_FUNC + Derived& lazyAssign(const DenseBase& other); + + EIGEN_DEVICE_FUNC + CommaInitializer operator<< (const Scalar& s); + + /** \deprecated it now returns \c *this */ + template + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } + + template + EIGEN_DEVICE_FUNC + CommaInitializer operator<< (const DenseBase& other); + + typedef Transpose TransposeReturnType; + EIGEN_DEVICE_FUNC + TransposeReturnType transpose(); + typedef typename internal::add_const >::type ConstTransposeReturnType; + EIGEN_DEVICE_FUNC + ConstTransposeReturnType transpose() const; + EIGEN_DEVICE_FUNC + void transposeInPlace(); + + EIGEN_DEVICE_FUNC static const ConstantReturnType + Constant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC static const ConstantReturnType + Constant(Index size, const Scalar& value); + EIGEN_DEVICE_FUNC static const ConstantReturnType + Constant(const Scalar& value); + + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType + LinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType + LinSpaced(const Scalar& low, const Scalar& high); + + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp + NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp + NullaryExpr(Index size, const CustomNullaryOp& func); + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp + NullaryExpr(const CustomNullaryOp& func); + + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); + + EIGEN_DEVICE_FUNC void fill(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setZero(); + EIGEN_DEVICE_FUNC Derived& setOnes(); + EIGEN_DEVICE_FUNC Derived& setRandom(); + + template EIGEN_DEVICE_FUNC + bool isApprox(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC + bool isMuchSmallerThan(const RealScalar& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + template EIGEN_DEVICE_FUNC + bool isMuchSmallerThan(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + + EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + + inline bool hasNaN() const; + inline bool allFinite() const; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const Scalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const Scalar& other); + + typedef typename internal::add_const_on_value_type::type>::type EvalReturnType; + /** \returns the matrix or vector obtained by evaluating this expression. + * + * Notice that in the case of a plain matrix or vector (not an expression) this function just returns + * a const reference, in order to avoid a useless copy. + * + * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE EvalReturnType eval() const + { + // Even though MSVC does not honor strong inlining when the return type + // is a dynamic matrix, we desperately need strong inlining for fixed + // size types on MSVC. + return typename internal::eval::type(derived()); + } + + /** swaps *this with the expression \a other. + * + */ + template + EIGEN_DEVICE_FUNC + void swap(const DenseBase& other) + { + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); + } + + /** swaps *this with the matrix or array \a other. + * + */ + template + EIGEN_DEVICE_FUNC + void swap(PlainObjectBase& other) + { + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); + } + + EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; + EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); + template EIGEN_DEVICE_FUNC + inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; + template EIGEN_DEVICE_FUNC + inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + + EIGEN_DEVICE_FUNC Scalar sum() const; + EIGEN_DEVICE_FUNC Scalar mean() const; + EIGEN_DEVICE_FUNC Scalar trace() const; + + EIGEN_DEVICE_FUNC Scalar prod() const; + + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; + + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar minCoeff(IndexType* index) const; + template EIGEN_DEVICE_FUNC + typename internal::traits::Scalar maxCoeff(IndexType* index) const; + + template + EIGEN_DEVICE_FUNC + Scalar redux(const BinaryOp& func) const; + + template + EIGEN_DEVICE_FUNC + void visit(Visitor& func) const; + + inline const WithFormat format(const IOFormat& fmt) const; + + /** \returns the unique coefficient of a 1x1 expression */ + EIGEN_DEVICE_FUNC + CoeffReturnType value() const + { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) + eigen_assert(this->rows() == 1 && this->cols() == 1); + return derived().coeff(0,0); + } + + bool all() const; + bool any() const; + Index count() const; + + typedef VectorwiseOp RowwiseReturnType; + typedef const VectorwiseOp ConstRowwiseReturnType; + typedef VectorwiseOp ColwiseReturnType; + typedef const VectorwiseOp ConstColwiseReturnType; + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { + return ConstRowwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { + return ConstColwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); + + typedef CwiseNullaryOp,PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); + + template + const Select + select(const DenseBase& thenMatrix, + const DenseBase& elseMatrix) const; + + template + inline const Select + select(const DenseBase& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const; + + template + inline const Select + select(const typename ElseDerived::Scalar& thenScalar, const DenseBase& elseMatrix) const; + + template RealScalar lpNorm() const; + + template + EIGEN_DEVICE_FUNC + const Replicate replicate() const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC + const Replicate replicate(Index rowFactor, Index colFactor) const + { + return Replicate(derived(), rowFactor, colFactor); + } + + typedef Reverse ReverseReturnType; + typedef const Reverse ConstReverseReturnType; + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const + { + return ConstReverseReturnType(derived()); + } + EIGEN_DEVICE_FUNC void reverseInPlace(); + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) +# include "../plugins/BlockMethods.h" +# ifdef EIGEN_DENSEBASE_PLUGIN +# include EIGEN_DENSEBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF + + // disable the use of evalTo for dense objects with a nice compilation error + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& ) const + { + EIGEN_STATIC_ASSERT((internal::is_same::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); + } + + protected: + /** Default constructor. Do nothing. */ + EIGEN_DEVICE_FUNC DenseBase() + { + /* Just checks for self-consistency of the flags. + * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down + */ +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, int(IsRowMajor)) + && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, int(!IsRowMajor))), + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION) +#endif + } + + private: + EIGEN_DEVICE_FUNC explicit DenseBase(int); + EIGEN_DEVICE_FUNC DenseBase(int,int); + template EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); +}; + +} // end namespace Eigen + +#endif // EIGEN_DENSEBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/DenseCoeffsBase.h b/thirdparty/eigen/Eigen/src/Core/DenseCoeffsBase.h new file mode 100644 index 000000000..c4af48ab6 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/DenseCoeffsBase.h @@ -0,0 +1,681 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSECOEFFSBASE_H +#define EIGEN_DENSECOEFFSBASE_H + +namespace Eigen { + +namespace internal { +template struct add_const_on_value_type_if_arithmetic +{ + typedef typename conditional::value, T, typename add_const_on_value_type::type>::type type; +}; +} + +/** \brief Base class providing read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #ReadOnlyAccessors Constant indicating read-only access + * + * This class defines the \c operator() \c const function and friends, which can be used to read specific + * entries of a matrix or array. + * + * \sa DenseCoeffsBase, DenseCoeffsBase, + * \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public EigenBase +{ + public: + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + + // Explanation for this CoeffReturnType typedef. + // - This is the return type of the coeff() method. + // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references + // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value). + // - The is_artihmetic check is required since "const int", "const double", etc. will cause warnings on some systems + // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is + // not possible, since the underlying expressions might not offer a valid address the reference could be referring to. + typedef typename internal::conditional::Flags&LvalueBit), + const Scalar&, + typename internal::conditional::value, Scalar, const Scalar>::type + >::type CoeffReturnType; + + typedef typename internal::add_const_on_value_type_if_arithmetic< + typename internal::packet_traits::type + >::type PacketReturnType; + + typedef EigenBase Base; + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const + { + return int(Derived::RowsAtCompileTime) == 1 ? 0 + : int(Derived::ColsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const + { + return int(Derived::ColsAtCompileTime) == 1 ? 0 + : int(Derived::RowsAtCompileTime) == 1 ? inner + : int(Derived::Flags)&RowMajorBit ? inner + : outer; + } + + /** Short version: don't use this function, use + * \link operator()(Index,Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) const \endlink. + * + * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { + return coeff(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \returns the coefficient at given the given row and column. + * + * \sa operator()(Index,Index), operator[](Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const + { + eigen_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return coeff(row, col); + } + + /** Short version: don't use this function, use + * \link operator[](Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameter \a index is in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) const \endlink. + * + * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + coeff(Index index) const + { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).coeff(index); + } + + + /** \returns the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + operator[](Index index) const + { + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + eigen_assert(index >= 0 && index < size()); + return coeff(index); + } + + /** \returns the coefficient at given index. + * + * This is synonymous to operator[](Index) const. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + operator()(Index index) const + { + eigen_assert(index >= 0 && index < size()); + return coeff(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + x() const { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + y() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } + + /** equivalent to operator[](2). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + z() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } + + /** equivalent to operator[](3). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CoeffReturnType + w() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } + + /** \internal + * \returns the packet of coefficients starting at the given row and column. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const + { + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).template packet(row,col); + } + + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return packet(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \internal + * \returns the packet of coefficients starting at the given index. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).template packet(index); + } + + protected: + // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase. + // But some methods are only available in the DirectAccess case. + // So we add dummy methods here with these names, so that "using... " doesn't fail. + // It's not private so that the child class DenseBase can access them, and it's not public + // either since it's an implementation detail, so has to be protected. + void coeffRef(); + void coeffRefByOuterInner(); + void writePacket(); + void writePacketByOuterInner(); + void copyCoeff(); + void copyCoeffByOuterInner(); + void copyPacket(); + void copyPacketByOuterInner(); + void stride(); + void innerStride(); + void outerStride(); + void rowStride(); + void colStride(); +}; + +/** \brief Base class providing read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #WriteAccessors Constant indicating read/write access + * + * This class defines the non-const \c operator() function and friends, which can be used to write specific + * entries of a matrix or array. This class inherits DenseCoeffsBase which + * defines the const variant for reading specific entries. + * + * \sa DenseCoeffsBase, \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + using Base::coeff; + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + using Base::rowIndexByOuterInner; + using Base::colIndexByOuterInner; + using Base::operator[]; + using Base::operator(); + using Base::x; + using Base::y; + using Base::z; + using Base::w; + + /** Short version: don't use this function, use + * \link operator()(Index,Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) \endlink. + * + * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) + { + eigen_internal_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeffRef(row,col); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + coeffRefByOuterInner(Index outer, Index inner) + { + return coeffRef(rowIndexByOuterInner(outer, inner), + colIndexByOuterInner(outer, inner)); + } + + /** \returns a reference to the coefficient at given the given row and column. + * + * \sa operator[](Index) + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + operator()(Index row, Index col) + { + eigen_assert(row >= 0 && row < rows() + && col >= 0 && col < cols()); + return coeffRef(row, col); + } + + + /** Short version: don't use this function, use + * \link operator[](Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) \endlink. + * + * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index) + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + coeffRef(Index index) + { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + operator[](Index index) + { + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This is synonymous to operator[](Index). + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + x() { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + y() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } + + /** equivalent to operator[](2). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + z() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } + + /** equivalent to operator[](3). */ + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + w() + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } +}; + +/** \brief Base class providing direct read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #DirectAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read-only using + * \c operator() . + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return derived().outerStride(); + } + + // FIXME shall we remove it ? + inline Index stride() const + { + return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index rowStride() const + { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + EIGEN_DEVICE_FUNC + inline Index colStride() const + { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +/** \brief Base class providing direct read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * \tparam #DirectWriteAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read/write using + * \c operator(). + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase + : public DenseCoeffsBase +{ + public: + + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::derived; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return derived().innerStride(); + } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return derived().outerStride(); + } + + // FIXME shall we remove it ? + inline Index stride() const + { + return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); + } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + EIGEN_DEVICE_FUNC + inline Index rowStride() const + { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + EIGEN_DEVICE_FUNC + inline Index colStride() const + { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +namespace internal { + +template +struct first_aligned_impl +{ + static inline Index run(const Derived&) + { return 0; } +}; + +template +struct first_aligned_impl +{ + static inline Index run(const Derived& m) + { + return internal::first_aligned(m.data(), m.size()); + } +}; + +/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization. + * + * \tparam Alignment requested alignment in Bytes. + * + * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more + * documentation. + */ +template +static inline Index first_aligned(const DenseBase& m) +{ + enum { ReturnZero = (int(evaluator::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; + return first_aligned_impl::run(m.derived()); +} + +template +static inline Index first_default_aligned(const DenseBase& m) +{ + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type DefaultPacketType; + return internal::first_aligned::alignment),Derived>(m); +} + +template::ret> +struct inner_stride_at_compile_time +{ + enum { ret = traits::InnerStrideAtCompileTime }; +}; + +template +struct inner_stride_at_compile_time +{ + enum { ret = 0 }; +}; + +template::ret> +struct outer_stride_at_compile_time +{ + enum { ret = traits::OuterStrideAtCompileTime }; +}; + +template +struct outer_stride_at_compile_time +{ + enum { ret = 0 }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSECOEFFSBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/DenseStorage.h b/thirdparty/eigen/Eigen/src/Core/DenseStorage.h new file mode 100644 index 000000000..82201d96a --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/DenseStorage.h @@ -0,0 +1,563 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2010-2013 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXSTORAGE_H +#define EIGEN_MATRIXSTORAGE_H + +#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN; +#else + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN +#endif + +namespace Eigen { + +namespace internal { + +struct constructor_without_unaligned_array_assert {}; + +template +EIGEN_DEVICE_FUNC +void check_static_allocation_size() +{ + // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit + #if EIGEN_STACK_ALLOCATION_LIMIT + EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); + #endif +} + +/** \internal + * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: + * to 16 bytes boundary if the total size is a multiple of 16 bytes. + */ +template ::value > +struct plain_array +{ + T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) +#elif EIGEN_GNUC_AT_LEAST(4,7) + // GCC 4.7 is too aggressive in its optimizations and remove the alignement test based on the fact the array is declared to be aligned. + // See this bug report: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900 + // Hiding the origin of the array pointer behind a function argument seems to do the trick even if the function is inlined: + template + EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ + eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ + && "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#else + #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ + eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \ + && "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#endif + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + T array[1]; + EIGEN_DEVICE_FUNC plain_array() {} + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} +}; + +} // end namespace internal + +/** \internal + * + * \class DenseStorage + * \ingroup Core_Module + * + * \brief Stores the data of a matrix + * + * This class stores the data of fixed-size, dynamic-size or mixed matrices + * in a way as compact as possible. + * + * \sa Matrix + */ +template class DenseStorage; + +// purely fixed-size matrix +template class DenseStorage +{ + internal::plain_array m_data; + public: + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()) {} + EIGEN_DEVICE_FUNC + DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + EIGEN_DEVICE_FUNC + DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) m_data = other.m_data; + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); + EIGEN_UNUSED_VARIABLE(size); + EIGEN_UNUSED_VARIABLE(rows); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// null matrix +template class DenseStorage +{ + public: + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; } + EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {} + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return 0; } + EIGEN_DEVICE_FUNC T *data() { return 0; } +}; + +// more specializations for null matrices; these are necessary to resolve ambiguities +template class DenseStorage +: public DenseStorage { }; + +template class DenseStorage +: public DenseStorage { }; + +template class DenseStorage +: public DenseStorage { }; + +// dynamic-size matrix with fixed-size storage +template class DenseStorage +{ + internal::plain_array m_data; + Index m_rows; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows() const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols() const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// dynamic-size matrix with fixed-size storage and fixed width +template class DenseStorage +{ + internal::plain_array m_data; + Index m_rows; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_rows = other.m_rows; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// dynamic-size matrix with fixed-size storage and fixed height +template class DenseStorage +{ + internal::plain_array m_data; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + m_data = other.m_data; + m_cols = other.m_cols; + } + return *this; + } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index, Index, Index cols) { m_cols = cols; } + void resize(Index, Index, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } +}; + +// purely dynamic matrix. +template class DenseStorage +{ + T *m_data; + Index m_rows; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) + : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) + : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*other.m_cols)) + , m_rows(other.m_rows) + , m_cols(other.m_cols) + { + internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_rows = 0; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) + { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index size, Index rows, Index cols) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*m_cols); + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) + { + if(size != m_rows*m_cols) + { + internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + } + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } +}; + +// matrix with dynamic width and fixed height (so that matrix has dynamic size). +template class DenseStorage +{ + T *m_data; + Index m_cols; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); + EIGEN_UNUSED_VARIABLE(rows); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(_Rows*other.m_cols)) + , m_cols(other.m_cols) + { + internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT + : m_data(std::move(other.m_data)) + , m_cols(std::move(other.m_cols)) + { + other.m_data = nullptr; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_data, other.m_data); + swap(m_cols, other.m_cols); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) + { + if(size != _Rows*m_cols) + { + internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + } + m_cols = cols; + } + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } +}; + +// matrix with dynamic height and fixed width (so that matrix has dynamic size). +template class DenseStorage +{ + T *m_data; + Index m_rows; + public: + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*_Cols)) + , m_rows(other.m_rows) + { + internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT + : m_data(std::move(other.m_data)) + , m_rows(std::move(other.m_rows)) + { + other.m_data = nullptr; + other.m_rows = 0; + } + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_data, other.m_data); + swap(m_rows, other.m_rows); + return *this; + } +#endif + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + void conservativeResize(Index size, Index rows, Index) + { + m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*_Cols); + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) + { + if(size != m_rows*_Cols) + { + internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); + if (size) + m_data = internal::conditional_aligned_new_auto(size); + else + m_data = 0; + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + } + m_rows = rows; + } + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } +}; + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/thirdparty/eigen/Eigen/src/Core/Diagonal.h b/thirdparty/eigen/Eigen/src/Core/Diagonal.h new file mode 100644 index 000000000..bfea0584b --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Diagonal.h @@ -0,0 +1,257 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2009 Benoit Jacob +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONAL_H +#define EIGEN_DIAGONAL_H + +namespace Eigen { + +/** \class Diagonal + * \ingroup Core_Module + * + * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix + * + * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal + * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal. + * A positive value means a superdiagonal, a negative value means a subdiagonal. + * You can also use Dynamic so the index can be set at runtime. + * + * The matrix is not required to be square. + * + * This class represents an expression of the main diagonal, or any sub/super diagonal + * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the + * time this is the only way it is used. + * + * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index) + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename ref_selector::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + typedef typename MatrixType::StorageKind StorageKind; + enum { + RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) ? Dynamic + : (EIGEN_PLAIN_ENUM_MIN(MatrixType::RowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), + MatrixType::ColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), + ColsAtCompileTime = 1, + MaxRowsAtCompileTime = int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic + : DiagIndex == DynamicIndex ? EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::MaxRowsAtCompileTime, + MatrixType::MaxColsAtCompileTime) + : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), + MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), + MaxColsAtCompileTime = 1, + MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions + MatrixTypeOuterStride = outer_stride_at_compile_time::ret, + InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, + OuterStrideAtCompileTime = 0 + }; +}; +} + +template class Diagonal + : public internal::dense_xpr_base< Diagonal >::type +{ + public: + + enum { DiagIndex = _DiagIndex }; + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) + + EIGEN_DEVICE_FUNC + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + + EIGEN_DEVICE_FUNC + inline Index rows() const + { + return m_index.value()<0 ? numext::mini(m_matrix.cols(),m_matrix.rows()+m_index.value()) + : numext::mini(m_matrix.rows(),m_matrix.cols()-m_index.value()); + } + + EIGEN_DEVICE_FUNC + inline Index cols() const { return 1; } + + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return m_matrix.outerStride() + 1; + } + + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return 0; + } + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index row, Index) + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index row, Index) const + { + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline CoeffReturnType coeff(Index row, Index) const + { + return m_matrix.coeff(row+rowOffset(), row+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline Scalar& coeffRef(Index idx) + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index idx) const + { + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline CoeffReturnType coeff(Index idx) const + { + return m_matrix.coeff(idx+rowOffset(), idx+colOffset()); + } + + EIGEN_DEVICE_FUNC + inline const typename internal::remove_all::type& + nestedExpression() const + { + return m_matrix; + } + + EIGEN_DEVICE_FUNC + inline Index index() const + { + return m_index.value(); + } + + protected: + typename internal::ref_selector::non_const_type m_matrix; + const internal::variable_if_dynamicindex m_index; + + private: + // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } + // trigger a compile-time error if someone try to call packet + template typename MatrixType::PacketReturnType packet(Index) const; + template typename MatrixType::PacketReturnType packet(Index,Index) const; +}; + +/** \returns an expression of the main diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * Example: \include MatrixBase_diagonal.cpp + * Output: \verbinclude MatrixBase_diagonal.out + * + * \sa class Diagonal */ +template +inline typename MatrixBase::DiagonalReturnType +MatrixBase::diagonal() +{ + return DiagonalReturnType(derived()); +} + +/** This is the const version of diagonal(). */ +template +inline typename MatrixBase::ConstDiagonalReturnType +MatrixBase::diagonal() const +{ + return ConstDiagonalReturnType(derived()); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_int.cpp + * Output: \verbinclude MatrixBase_diagonal_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +inline typename MatrixBase::DiagonalDynamicIndexReturnType +MatrixBase::diagonal(Index index) +{ + return DiagonalDynamicIndexReturnType(derived(), index); +} + +/** This is the const version of diagonal(Index). */ +template +inline typename MatrixBase::ConstDiagonalDynamicIndexReturnType +MatrixBase::diagonal(Index index) const +{ + return ConstDiagonalDynamicIndexReturnType(derived(), index); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_template_int.cpp + * Output: \verbinclude MatrixBase_diagonal_template_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type +MatrixBase::diagonal() +{ + return typename DiagonalIndexReturnType::Type(derived()); +} + +/** This is the const version of diagonal(). */ +template +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +MatrixBase::diagonal() const +{ + return typename ConstDiagonalIndexReturnType::Type(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONAL_H diff --git a/thirdparty/eigen/Eigen/src/Core/DiagonalMatrix.h b/thirdparty/eigen/Eigen/src/Core/DiagonalMatrix.h new file mode 100644 index 000000000..ecfdce8ef --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/DiagonalMatrix.h @@ -0,0 +1,343 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALMATRIX_H +#define EIGEN_DIAGONALMATRIX_H + +namespace Eigen { + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +class DiagonalBase : public EigenBase +{ + public: + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::RealScalar RealScalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + IsVectorAtCompileTime = 0, + Flags = NoPreferredStorageOrderBit + }; + + typedef Matrix DenseMatrixType; + typedef DenseMatrixType DenseType; + typedef DiagonalMatrix PlainObject; + + EIGEN_DEVICE_FUNC + inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC + inline Derived& derived() { return *static_cast(this); } + + EIGEN_DEVICE_FUNC + DenseMatrixType toDenseMatrix() const { return derived(); } + + EIGEN_DEVICE_FUNC + inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + EIGEN_DEVICE_FUNC + inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + + EIGEN_DEVICE_FUNC + inline Index rows() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return diagonal().size(); } + + template + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &matrix) const + { + return Product(derived(),matrix.derived()); + } + + typedef DiagonalWrapper, const DiagonalVectorType> > InverseReturnType; + EIGEN_DEVICE_FUNC + inline const InverseReturnType + inverse() const + { + return InverseReturnType(diagonal().cwiseInverse()); + } + + EIGEN_DEVICE_FUNC + inline const DiagonalWrapper + operator*(const Scalar& scalar) const + { + return DiagonalWrapper(diagonal() * scalar); + } + EIGEN_DEVICE_FUNC + friend inline const DiagonalWrapper + operator*(const Scalar& scalar, const DiagonalBase& other) + { + return DiagonalWrapper(scalar * other.diagonal()); + } +}; + +#endif + +/** \class DiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a diagonal matrix with its storage + * + * \param _Scalar the type of coefficients + * \param SizeAtCompileTime the dimension of the matrix, or Dynamic + * \param MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults + * to SizeAtCompileTime. Most of the time, you do not need to specify it. + * + * \sa class DiagonalWrapper + */ + +namespace internal { +template +struct traits > + : traits > +{ + typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; + typedef DiagonalShape StorageKind; + enum { + Flags = LvalueBit | NoPreferredStorageOrderBit + }; +}; +} +template +class DiagonalMatrix + : public DiagonalBase > +{ + public: + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef const DiagonalMatrix& Nested; + typedef _Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + #endif + + protected: + + DiagonalVectorType m_diagonal; + + public: + + /** const version of diagonal(). */ + EIGEN_DEVICE_FUNC + inline const DiagonalVectorType& diagonal() const { return m_diagonal; } + /** \returns a reference to the stored vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC + inline DiagonalVectorType& diagonal() { return m_diagonal; } + + /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC + inline DiagonalMatrix() {} + + /** Constructs a diagonal matrix with given dimension */ + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + + /** 2D constructor. */ + EIGEN_DEVICE_FUNC + inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {} + + /** 3D constructor. */ + EIGEN_DEVICE_FUNC + inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {} + + /** Copy constructor. */ + template + EIGEN_DEVICE_FUNC + inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */ + inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {} + #endif + + /** generic constructor from expression of the diagonal coefficients */ + template + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) + {} + + /** Copy operator. */ + template + EIGEN_DEVICE_FUNC + DiagonalMatrix& operator=(const DiagonalBase& other) + { + m_diagonal = other.diagonal(); + return *this; + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC + DiagonalMatrix& operator=(const DiagonalMatrix& other) + { + m_diagonal = other.diagonal(); + return *this; + } + #endif + + /** Resizes to given size. */ + EIGEN_DEVICE_FUNC + inline void resize(Index size) { m_diagonal.resize(size); } + /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC + inline void setZero() { m_diagonal.setZero(); } + /** Resizes and sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC + inline void setZero(Index size) { m_diagonal.setZero(size); } + /** Sets this matrix to be the identity matrix of the current size. */ + EIGEN_DEVICE_FUNC + inline void setIdentity() { m_diagonal.setOnes(); } + /** Sets this matrix to be the identity matrix of the given size. */ + EIGEN_DEVICE_FUNC + inline void setIdentity(Index size) { m_diagonal.setOnes(size); } +}; + +/** \class DiagonalWrapper + * \ingroup Core_Module + * + * \brief Expression of a diagonal matrix + * + * \param _DiagonalVectorType the type of the vector of diagonal coefficients + * + * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients, + * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal() + * and most of the time this is the only way that it is used. + * + * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal() + */ + +namespace internal { +template +struct traits > +{ + typedef _DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::StorageIndex StorageIndex; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit + }; +}; +} + +template +class DiagonalWrapper + : public DiagonalBase >, internal::no_assignment_operator +{ + public: + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef _DiagonalVectorType DiagonalVectorType; + typedef DiagonalWrapper Nested; + #endif + + /** Constructor from expression of diagonal coefficients to wrap. */ + EIGEN_DEVICE_FUNC + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + + /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + EIGEN_DEVICE_FUNC + const DiagonalVectorType& diagonal() const { return m_diagonal; } + + protected: + typename DiagonalVectorType::Nested m_diagonal; +}; + +/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients + * + * \only_for_vectors + * + * Example: \include MatrixBase_asDiagonal.cpp + * Output: \verbinclude MatrixBase_asDiagonal.out + * + * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() + **/ +template +inline const DiagonalWrapper +MatrixBase::asDiagonal() const +{ + return DiagonalWrapper(derived()); +} + +/** \returns true if *this is approximately equal to a diagonal matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isDiagonal.cpp + * Output: \verbinclude MatrixBase_isDiagonal.out + * + * \sa asDiagonal() + */ +template +bool MatrixBase::isDiagonal(const RealScalar& prec) const +{ + if(cols() != rows()) return false; + RealScalar maxAbsOnDiagonal = static_cast(-1); + for(Index j = 0; j < cols(); ++j) + { + RealScalar absOnDiagonal = numext::abs(coeff(j,j)); + if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; + } + for(Index j = 0; j < cols(); ++j) + for(Index i = 0; i < j; ++i) + { + if(!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false; + if(!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false; + } + return true; +} + +namespace internal { + +template<> struct storage_kind_to_shape { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALMATRIX_H diff --git a/thirdparty/eigen/Eigen/src/Core/DiagonalProduct.h b/thirdparty/eigen/Eigen/src/Core/DiagonalProduct.h new file mode 100644 index 000000000..d372b938f --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/DiagonalProduct.h @@ -0,0 +1,28 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALPRODUCT_H +#define EIGEN_DIAGONALPRODUCT_H + +namespace Eigen { + +/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. + */ +template +template +inline const Product +MatrixBase::operator*(const DiagonalBase &a_diagonal) const +{ + return Product(derived(),a_diagonal.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALPRODUCT_H diff --git a/thirdparty/eigen/Eigen/src/Core/Dot.h b/thirdparty/eigen/Eigen/src/Core/Dot.h new file mode 100644 index 000000000..1d7f2262e --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Dot.h @@ -0,0 +1,312 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008, 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DOT_H +#define EIGEN_DOT_H + +namespace Eigen { + +namespace internal { + +// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot +// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE +// looking at the static assertions. Thus this is a trick to get better compile errors. +template +struct dot_nocheck +{ + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; + EIGEN_DEVICE_FUNC + static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + { + return a.template binaryExpr(b).sum(); + } +}; + +template +struct dot_nocheck +{ + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; + EIGEN_DEVICE_FUNC + static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + { + return a.transpose().template binaryExpr(b).sum(); + } +}; + +} // end namespace internal + +/** \returns the dot product of *this with other. + * + * \only_for_vectors + * + * \note If the scalar type is complex numbers, then this function returns the hermitian + * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the + * second variable. + * + * \sa squaredNorm(), norm() + */ +template +template +EIGEN_DEVICE_FUNC +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType +MatrixBase::dot(const MatrixBase& other) const +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) + typedef internal::scalar_conj_product_op func; + EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); + + eigen_assert(size() == other.size()); + + return internal::dot_nocheck::run(*this, other); +} + +//---------- implementation of L2 norm and related functions ---------- + +/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the sum of the square of all the matrix entries. + * For vectors, this is also equals to the dot product of \c *this with itself. + * + * \sa dot(), norm(), lpNorm() + */ +template +EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const +{ + return numext::real((*this).cwiseAbs2().sum()); +} + +/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the square root of the sum of the square of all the matrix entries. + * For vectors, this is also equals to the square root of the dot product of \c *this with itself. + * + * \sa lpNorm(), dot(), squaredNorm() + */ +template +inline typename NumTraits::Scalar>::Real MatrixBase::norm() const +{ + return numext::sqrt(squaredNorm()); +} + +/** \returns an expression of the quotient of \c *this by its own norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \only_for_vectors + * + * \sa norm(), normalize() + */ +template +inline const typename MatrixBase::PlainObject +MatrixBase::normalized() const +{ + typedef typename internal::nested_eval::type _Nested; + _Nested n(derived()); + RealScalar z = n.squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + return n / numext::sqrt(z); + else + return n; +} + +/** Normalizes the vector, i.e. divides it by its own norm. + * + * \only_for_vectors + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa norm(), normalized() + */ +template +inline void MatrixBase::normalize() +{ + RealScalar z = squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + derived() /= numext::sqrt(z); +} + +/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow. + * + * \only_for_vectors + * + * This method is analogue to the normalized() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \sa stableNorm(), stableNormalize(), normalized() + */ +template +inline const typename MatrixBase::PlainObject +MatrixBase::stableNormalized() const +{ + typedef typename internal::nested_eval::type _Nested; + _Nested n(derived()); + RealScalar w = n.cwiseAbs().maxCoeff(); + RealScalar z = (n/w).squaredNorm(); + if(z>RealScalar(0)) + return n / (numext::sqrt(z)*w); + else + return n; +} + +/** Normalizes the vector while avoid underflow and overflow + * + * \only_for_vectors + * + * This method is analogue to the normalize() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa stableNorm(), stableNormalized(), normalize() + */ +template +inline void MatrixBase::stableNormalize() +{ + RealScalar w = cwiseAbs().maxCoeff(); + RealScalar z = (derived()/w).squaredNorm(); + if(z>RealScalar(0)) + derived() /= numext::sqrt(z)*w; +} + +//---------- implementation of other norms ---------- + +namespace internal { + +template +struct lpNorm_selector +{ + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const MatrixBase& m) + { + EIGEN_USING_STD_MATH(pow) + return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); + } +}; + +template +struct lpNorm_selector +{ + EIGEN_DEVICE_FUNC + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.cwiseAbs().sum(); + } +}; + +template +struct lpNorm_selector +{ + EIGEN_DEVICE_FUNC + static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + { + return m.norm(); + } +}; + +template +struct lpNorm_selector +{ + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const MatrixBase& m) + { + if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0)) + return RealScalar(0); + return m.cwiseAbs().maxCoeff(); + } +}; + +} // end namespace internal + +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * In all cases, if \c *this is empty, then the value 0 is returned. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. + * + * \sa norm() + */ +template +template +#ifndef EIGEN_PARSED_BY_DOXYGEN +inline typename NumTraits::Scalar>::Real +#else +MatrixBase::RealScalar +#endif +MatrixBase::lpNorm() const +{ + return internal::lpNorm_selector::run(*this); +} + +//---------- implementation of isOrthogonal / isUnitary ---------- + +/** \returns true if *this is approximately orthogonal to \a other, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isOrthogonal.cpp + * Output: \verbinclude MatrixBase_isOrthogonal.out + */ +template +template +bool MatrixBase::isOrthogonal +(const MatrixBase& other, const RealScalar& prec) const +{ + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); + return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); +} + +/** \returns true if *this is approximately an unitary matrix, + * within the precision given by \a prec. In the case where the \a Scalar + * type is real numbers, a unitary matrix is an orthogonal matrix, whence the name. + * + * \note This can be used to check whether a family of vectors forms an orthonormal basis. + * Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an + * orthonormal basis. + * + * Example: \include MatrixBase_isUnitary.cpp + * Output: \verbinclude MatrixBase_isUnitary.out + */ +template +bool MatrixBase::isUnitary(const RealScalar& prec) const +{ + typename internal::nested_eval::type self(derived()); + for(Index i = 0; i < cols(); ++i) + { + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) + return false; + for(Index j = 0; j < i; ++j) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) + return false; + } + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_DOT_H diff --git a/thirdparty/eigen/Eigen/src/Core/EigenBase.h b/thirdparty/eigen/Eigen/src/Core/EigenBase.h new file mode 100644 index 000000000..f76995af9 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/EigenBase.h @@ -0,0 +1,155 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENBASE_H +#define EIGEN_EIGENBASE_H + +namespace Eigen { + +/** \class EigenBase + * + * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). + * + * In other words, an EigenBase object is an object that can be copied into a MatrixBase. + * + * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc. + * + * Notice that this class is trivial, it is only used to disambiguate overloaded functions. + * + * \sa \blank \ref TopicClassHierarchy + */ +template struct EigenBase +{ +// typedef typename internal::plain_matrix_type::type PlainObject; + + /** \brief The interface type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead. + * \sa StorageIndex, \ref TopicPreprocessorDirectives. + */ + typedef Eigen::Index Index; + + // FIXME is it needed? + typedef typename internal::traits::StorageKind StorageKind; + + /** \returns a reference to the derived object */ + EIGEN_DEVICE_FUNC + Derived& derived() { return *static_cast(this); } + /** \returns a const reference to the derived object */ + EIGEN_DEVICE_FUNC + const Derived& derived() const { return *static_cast(this); } + + EIGEN_DEVICE_FUNC + inline Derived& const_cast_derived() const + { return *static_cast(const_cast(this)); } + EIGEN_DEVICE_FUNC + inline const Derived& const_derived() const + { return *static_cast(this); } + + /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + EIGEN_DEVICE_FUNC + inline Index rows() const { return derived().rows(); } + /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + EIGEN_DEVICE_FUNC + inline Index cols() const { return derived().cols(); } + /** \returns the number of coefficients, which is rows()*cols(). + * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC + inline Index size() const { return rows() * cols(); } + + /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const + { derived().evalTo(dst); } + + /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ + template + EIGEN_DEVICE_FUNC + inline void addTo(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(),cols()); + evalTo(res); + dst += res; + } + + /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ + template + EIGEN_DEVICE_FUNC + inline void subTo(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(),cols()); + evalTo(res); + dst -= res; + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = dst * this->derived(); + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const + { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = this->derived() * dst; + } + +}; + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** \brief Copies the generic expression \a other into *this. + * + * \details The expression must provide a (templated) evalTo(Derived& dst) const + * function which does the actual job. In practice, this allows any user to write + * its own special matrix without having to modify MatrixBase + * + * \returns a reference to *this. + */ +template +template +Derived& DenseBase::operator=(const EigenBase &other) +{ + call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +Derived& DenseBase::operator+=(const EigenBase &other) +{ + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +template +template +Derived& DenseBase::operator-=(const EigenBase &other) +{ + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_EIGENBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/ForceAlignedAccess.h b/thirdparty/eigen/Eigen/src/Core/ForceAlignedAccess.h new file mode 100644 index 000000000..7b08b45e6 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/ForceAlignedAccess.h @@ -0,0 +1,146 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FORCEALIGNEDACCESS_H +#define EIGEN_FORCEALIGNEDACCESS_H + +namespace Eigen { + +/** \class ForceAlignedAccess + * \ingroup Core_Module + * + * \brief Enforce aligned packet loads and stores regardless of what is requested + * + * \param ExpressionType the type of the object of which we are forcing aligned packet access + * + * This class is the return type of MatrixBase::forceAlignedAccess() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::forceAlignedAccess() + */ + +namespace internal { +template +struct traits > : public traits +{}; +} + +template class ForceAlignedAccess + : public internal::dense_xpr_base< ForceAlignedAccess >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) + + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const + { + return m_expression.coeff(row, col); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType& m_expression; + + private: + ForceAlignedAccess& operator=(const ForceAlignedAccess&); +}; + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(),class ForceAlignedAccess + */ +template +inline const ForceAlignedAccess +MatrixBase::forceAlignedAccess() const +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(), class ForceAlignedAccess + */ +template +inline ForceAlignedAccess +MatrixBase::forceAlignedAccess() +{ + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename internal::add_const_on_value_type,Derived&>::type>::type +MatrixBase::forceAlignedAccessIf() const +{ + return derived(); // FIXME This should not work but apparently is never used +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline typename internal::conditional,Derived&>::type +MatrixBase::forceAlignedAccessIf() +{ + return derived(); // FIXME This should not work but apparently is never used +} + +} // end namespace Eigen + +#endif // EIGEN_FORCEALIGNEDACCESS_H diff --git a/thirdparty/eigen/Eigen/src/Core/Fuzzy.h b/thirdparty/eigen/Eigen/src/Core/Fuzzy.h new file mode 100644 index 000000000..3e403a09d --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Fuzzy.h @@ -0,0 +1,155 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FUZZY_H +#define EIGEN_FUZZY_H + +namespace Eigen { + +namespace internal +{ + +template::IsInteger> +struct isApprox_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) + { + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + } +}; + +template +struct isApprox_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) + { + return x.matrix() == y.matrix(); + } +}; + +template::IsInteger> +struct isMuchSmallerThan_object_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) + { + return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); + } +}; + +template +struct isMuchSmallerThan_object_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) + { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +template::IsInteger> +struct isMuchSmallerThan_scalar_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec) + { + return x.cwiseAbs2().sum() <= numext::abs2(prec * y); + } +}; + +template +struct isMuchSmallerThan_scalar_selector +{ + EIGEN_DEVICE_FUNC + static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&) + { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +} // end namespace internal + + +/** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$ + * are considered to be approximately equal within precision \f$ p \f$ if + * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm + * L2 norm). + * + * \note Because of the multiplicativeness of this comparison, one can't use this function + * to check whether \c *this is approximately equal to the zero matrix or vector. + * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix + * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const + * RealScalar&, RealScalar) instead. + * + * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +bool DenseBase::isApprox( + const DenseBase& other, + const RealScalar& prec +) const +{ + return internal::isApprox_selector::run(derived(), other.derived(), prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f] + * + * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason, + * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm + * of a reference matrix of same dimensions. + * + * \sa isApprox(), isMuchSmallerThan(const DenseBase&, RealScalar) const + */ +template +bool DenseBase::isMuchSmallerThan( + const typename NumTraits::Real& other, + const RealScalar& prec +) const +{ + return internal::isMuchSmallerThan_scalar_selector::run(derived(), other, prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm. + * + * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +bool DenseBase::isMuchSmallerThan( + const DenseBase& other, + const RealScalar& prec +) const +{ + return internal::isMuchSmallerThan_object_selector::run(derived(), other.derived(), prec); +} + +} // end namespace Eigen + +#endif // EIGEN_FUZZY_H diff --git a/thirdparty/eigen/Eigen/src/Core/GeneralProduct.h b/thirdparty/eigen/Eigen/src/Core/GeneralProduct.h new file mode 100644 index 000000000..0cc2d08e2 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/GeneralProduct.h @@ -0,0 +1,436 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_PRODUCT_H +#define EIGEN_GENERAL_PRODUCT_H + +namespace Eigen { + +enum { + Large = 2, + Small = 3 +}; + +namespace internal { + +template struct product_type_selector; + +template struct product_size_category +{ + enum { is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD, + value = is_large ? Large + : Size == 1 ? 1 + : Small + }; +}; + +template struct product_type +{ + typedef typename remove_all::type _Lhs; + typedef typename remove_all::type _Rhs; + enum { + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) + }; + + // the splitting into different lines of code here, introducing the _select enums and the typedef below, + // is to work around an internal compiler error with gcc 4.1 and 4.2. +private: + enum { + rows_select = product_size_category::value, + cols_select = product_size_category::value, + depth_select = product_size_category::value + }; + typedef product_type_selector selector; + +public: + enum { + value = selector::ret, + ret = selector::ret + }; +#ifdef EIGEN_DEBUG_PRODUCT + static void debug() + { + EIGEN_DEBUG_VAR(Rows); + EIGEN_DEBUG_VAR(Cols); + EIGEN_DEBUG_VAR(Depth); + EIGEN_DEBUG_VAR(rows_select); + EIGEN_DEBUG_VAR(cols_select); + EIGEN_DEBUG_VAR(depth_select); + EIGEN_DEBUG_VAR(value); + } +#endif +}; + +/* The following allows to select the kind of product at compile time + * based on the three dimensions of the product. + * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ +// FIXME I'm not sure the current mapping is the ideal one. +template struct product_type_selector { enum { ret = OuterProduct }; }; +template struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; }; +template struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; +template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Small,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Large,Small> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector<1, Large,Large> { enum { ret = GemvProduct }; }; +template<> struct product_type_selector<1, Small,Large> { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemvProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = GemmProduct }; }; + +} // end namespace internal + +/*********************************************************************** +* Implementation of Inner Vector Vector Product +***********************************************************************/ + +// FIXME : maybe the "inner product" could return a Scalar +// instead of a 1x1 matrix ?? +// Pro: more natural for the user +// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix +// product ends up to a row-vector times col-vector product... To tackle this use +// case, we could have a specialization for Block with: operator=(Scalar x); + +/*********************************************************************** +* Implementation of Outer Vector Vector Product +***********************************************************************/ + +/*********************************************************************** +* Implementation of General Matrix Vector Product +***********************************************************************/ + +/* According to the shape/flags of the matrix we have to distinghish 3 different cases: + * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine + * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine + * 3 - all other cases are handled using a simple loop along the outer-storage direction. + * Therefore we need a lower level meta selector. + * Furthermore, if the matrix is the rhs, then the product has to be transposed. + */ +namespace internal { + +template +struct gemv_dense_selector; + +} // end namespace internal + +namespace internal { + +template struct gemv_static_vector_if; + +template +struct gemv_static_vector_if +{ + EIGEN_STRONG_INLINE Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } +}; + +template +struct gemv_static_vector_if +{ + EIGEN_STRONG_INLINE Scalar* data() { return 0; } +}; + +template +struct gemv_static_vector_if +{ + enum { + ForceAlignment = internal::packet_traits::Vectorizable, + PacketSize = internal::packet_traits::size + }; + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { + return ForceAlignment + ? reinterpret_cast((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) + : m_data.array; + } + #endif +}; + +// The vector is on the left => transposition +template +struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_selector +{ + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + // make sure Dest is a compile-time vector type (bug 1166) + typedef typename conditional::type ActualDest; + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), + ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), + MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal + }; + + gemv_static_vector_if static_dest; + + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); + + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } + } +}; + +template<> struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 + }; + + gemv_static_vector_if static_rhs; + + ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), + DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); + + if(!DirectlyUseRhs) + { + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = actualRhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + Map(actualRhsPtr, actualRhs.size()) = actualRhs; + } + + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), + dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) + actualAlpha); + } +}; + +template<> struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); + const Index size = rhs.rows(); + for(Index k=0; k struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + typename nested_eval::type actual_rhs(rhs); + const Index rows = dest.rows(); + for(Index i=0; i +template +inline const Product +MatrixBase::operator*(const MatrixBase &other) const +{ + // A note regarding the function declaration: In MSVC, this function will sometimes + // not be inlined since DenseStorage is an unwindable object for dynamic + // matrices and product types are holding a member to store the result. + // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) +#ifdef EIGEN_DEBUG_PRODUCT + internal::product_type::debug(); +#endif + + return Product(derived(), other.derived()); +} + +#endif // __CUDACC__ + +/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. + * + * The returned product will behave like any other expressions: the coefficients of the product will be + * computed once at a time as requested. This might be useful in some extremely rare cases when only + * a small and no coherent fraction of the result's coefficients have to be computed. + * + * \warning This version of the matrix product can be much much slower. So use it only if you know + * what you are doing and that you measured a true speed improvement. + * + * \sa operator*(const MatrixBase&) + */ +template +template +const Product +MatrixBase::lazyProduct(const MatrixBase &other) const +{ + enum { + ProductIsValid = Derived::ColsAtCompileTime==Dynamic + || OtherDerived::RowsAtCompileTime==Dynamic + || int(Derived::ColsAtCompileTime)==int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived,OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT(ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) + + return Product(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/thirdparty/eigen/Eigen/src/Core/GenericPacketMath.h b/thirdparty/eigen/Eigen/src/Core/GenericPacketMath.h new file mode 100644 index 000000000..27033a2dd --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/GenericPacketMath.h @@ -0,0 +1,593 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERIC_PACKET_MATH_H +#define EIGEN_GENERIC_PACKET_MATH_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \file GenericPacketMath.h + * + * Default implementation for types not supported by the vectorization. + * In practice these functions are provided to make easier the writing + * of generic vectorized code. + */ + +#ifndef EIGEN_DEBUG_ALIGNED_LOAD +#define EIGEN_DEBUG_ALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_LOAD +#define EIGEN_DEBUG_UNALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_ALIGNED_STORE +#define EIGEN_DEBUG_ALIGNED_STORE +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_STORE +#define EIGEN_DEBUG_UNALIGNED_STORE +#endif + +struct default_packet_traits +{ + enum { + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 1, + HasMin = 1, + HasMax = 1, + HasConj = 1, + HasSetLinear = 1, + HasBlend = 0, + + HasDiv = 0, + HasSqrt = 0, + HasRsqrt = 0, + HasExp = 0, + HasLog = 0, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 0, + + HasSin = 0, + HasCos = 0, + HasTan = 0, + HasASin = 0, + HasACos = 0, + HasATan = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, + HasBetaInc = 0, + + HasRound = 0, + HasFloor = 0, + HasCeil = 0, + + HasSign = 0 + }; +}; + +template struct packet_traits : default_packet_traits +{ + typedef T type; + typedef T half; + enum { + Vectorizable = 0, + size = 1, + AlignedOnScalar = 0, + HasHalfPacket = 0 + }; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0 + }; +}; + +template struct packet_traits : packet_traits { }; + +template struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast(a); +} + +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { + return static_cast(a); +} + +/** \internal \returns a + b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +padd(const Packet& a, + const Packet& b) { return a+b; } + +/** \internal \returns a - b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +psub(const Packet& a, + const Packet& b) { return a-b; } + +/** \internal \returns -a (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pnegate(const Packet& a) { return -a; } + +/** \internal \returns conj(a) (coeff-wise) */ + +template EIGEN_DEVICE_FUNC inline Packet +pconj(const Packet& a) { return numext::conj(a); } + +/** \internal \returns a * b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmul(const Packet& a, + const Packet& b) { return a*b; } + +/** \internal \returns a / b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pdiv(const Packet& a, + const Packet& b) { return a/b; } + +/** \internal \returns the min of \a a and \a b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmin(const Packet& a, + const Packet& b) { return numext::mini(a, b); } + +/** \internal \returns the max of \a a and \a b (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmax(const Packet& a, + const Packet& b) { return numext::maxi(a, b); } + +/** \internal \returns the absolute value of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +pabs(const Packet& a) { using std::abs; return abs(a); } + +/** \internal \returns the phase angle of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +parg(const Packet& a) { using numext::arg; return arg(a); } + +/** \internal \returns the bitwise and of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pand(const Packet& a, const Packet& b) { return a & b; } + +/** \internal \returns the bitwise or of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +por(const Packet& a, const Packet& b) { return a | b; } + +/** \internal \returns the bitwise xor of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pxor(const Packet& a, const Packet& b) { return a ^ b; } + +/** \internal \returns the bitwise andnot of \a a and \a b */ +template EIGEN_DEVICE_FUNC inline Packet +pandnot(const Packet& a, const Packet& b) { return a & (!b); } + +/** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ +template EIGEN_DEVICE_FUNC inline Packet +pload(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet version of \a *from, (un-aligned load) */ +template EIGEN_DEVICE_FUNC inline Packet +ploadu(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template EIGEN_DEVICE_FUNC inline Packet +pset1(const typename unpacket_traits::type& a) { return a; } + +/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +template EIGEN_DEVICE_FUNC inline Packet +pload1(const typename unpacket_traits::type *a) { return pset1(*a); } + +/** \internal \returns a packet with elements of \a *from duplicated. + * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} + * Currently, this function is only used for scalar * complex products. + */ +template EIGEN_DEVICE_FUNC inline Packet +ploaddup(const typename unpacket_traits::type* from) { return *from; } + +/** \internal \returns a packet with elements of \a *from quadrupled. + * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and + * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} + * Currently, this function is only used in matrix products. + * For packet-size smaller or equal to 4, this function is equivalent to pload1 + */ +template EIGEN_DEVICE_FUNC inline Packet +ploadquad(const typename unpacket_traits::type* from) +{ return pload1(from); } + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * a2 = pload1(a+2); + * a3 = pload1(a+3); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast2 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast4(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1, Packet& a2, Packet& a3) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); + a2 = pload1(a+2); + a3 = pload1(a+3); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast4 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast2(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); +} + +/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ +template inline Packet +plset(const typename unpacket_traits::type& a) { return a; } + +/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ +template EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) +{ (*to) = from; } + +/** \internal copy the packet \a from to \a *to, (un-aligned store) */ +template EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + + template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) + { return ploadu(from); } + + template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) + { pstore(to, from); } + +/** \internal tries to do cache prefetching of \a addr */ +template EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) +{ +#ifdef __CUDA_ARCH__ +#if defined(__LP64__) + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) + __builtin_prefetch(addr); +#endif +} + +/** \internal \returns the first element of a packet */ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) +{ return a; } + +/** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ +template EIGEN_DEVICE_FUNC inline Packet +preduxp(const Packet* vecs) { return vecs[0]; } + +/** \internal \returns the sum of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) +{ return a; } + +/** \internal \returns the sum of the elements of \a a by block of 4 elements. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template EIGEN_DEVICE_FUNC inline +typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type +predux_downto4(const Packet& a) +{ return a; } + +/** \internal \returns the product of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) +{ return a; } + +/** \internal \returns the min of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) +{ return a; } + +/** \internal \returns the max of the elements of \a a*/ +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) +{ return a; } + +/** \internal \returns the reversed elements of \a a*/ +template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) +{ return a; } + +/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ +template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) +{ + // FIXME: uncomment the following in case we drop the internal imag and real functions. +// using std::imag; +// using std::real; + return Packet(imag(a),real(a)); +} + +/************************** +* Special math functions +***************************/ + +/** \internal \returns the sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psin(const Packet& a) { using std::sin; return sin(a); } + +/** \internal \returns the cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcos(const Packet& a) { using std::cos; return cos(a); } + +/** \internal \returns the tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptan(const Packet& a) { using std::tan; return tan(a); } + +/** \internal \returns the arc sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pasin(const Packet& a) { using std::asin; return asin(a); } + +/** \internal \returns the arc cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pacos(const Packet& a) { using std::acos; return acos(a); } + +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet patan(const Packet& a) { using std::atan; return atan(a); } + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } + +/** \internal \returns the exp of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pexp(const Packet& a) { using std::exp; return exp(a); } + +/** \internal \returns the log of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog(const Packet& a) { using std::log; return log(a); } + +/** \internal \returns the log1p of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog1p(const Packet& a) { return numext::log1p(a); } + +/** \internal \returns the log10 of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog10(const Packet& a) { using std::log10; return log10(a); } + +/** \internal \returns the square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } + +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet prsqrt(const Packet& a) { + return pdiv(pset1(1), psqrt(a)); +} + +/** \internal \returns the rounded value of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pround(const Packet& a) { using numext::round; return round(a); } + +/** \internal \returns the floor of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } + +/** \internal \returns the ceil of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } + +/*************************************************************************** +* The following functions might not have to be overwritten for vectorized types +***************************************************************************/ + +/** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */ +// NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) +template +inline void pstore1(typename unpacket_traits::type* to, const typename unpacket_traits::type& a) +{ + pstore(to, pset1(a)); +} + +/** \internal \returns a * b + c (coeff-wise) */ +template EIGEN_DEVICE_FUNC inline Packet +pmadd(const Packet& a, + const Packet& b, + const Packet& c) +{ return padd(pmul(a, b),c); } + +/** \internal \returns a packet version of \a *from. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) +{ + if(Alignment >= unpacket_traits::alignment) + return pload(from); + else + return ploadu(from); +} + +/** \internal copy the packet \a from to \a *to. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) +{ + if(Alignment >= unpacket_traits::alignment) + pstore(to, from); + else + pstoreu(to, from); +} + +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +inline Packet ploadt_ro(const typename unpacket_traits::type* from) +{ + return ploadt(from); +} + +/** \internal default implementation of palign() allowing partial specialization */ +template +struct palign_impl +{ + // by default data are aligned, so there is nothing to be done :) + static inline void run(PacketType&, const PacketType&) {} +}; + +/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements + * of \a first and \a Offset first elements of \a second. + * + * This function is currently only used to optimize matrix-vector products on unligned matrices. + * It takes 2 packets that represent a contiguous memory array, and returns a packet starting + * at the position \a Offset. For instance, for packets of 4 elements, we have: + * Input: + * - first = {f0,f1,f2,f3} + * - second = {s0,s1,s2,s3} + * Output: + * - if Offset==0 then {f0,f1,f2,f3} + * - if Offset==1 then {f1,f2,f3,s0} + * - if Offset==2 then {f2,f3,s0,s1} + * - if Offset==3 then {f3,s0,s1,s3} + */ +template +inline void palign(PacketType& first, const PacketType& second) +{ + palign_impl::run(first,second); +} + +/*************************************************************************** +* Fast complex products (GCC generates a function call which is very slow) +***************************************************************************/ + +// Eigen+CUDA does not support complexes. +#ifndef __CUDACC__ + +template<> inline std::complex pmul(const std::complex& a, const std::complex& b) +{ return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } + +template<> inline std::complex pmul(const std::complex& a, const std::complex& b) +{ return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } + +#endif + + +/*************************************************************************** + * PacketBlock, that is a collection of N packets where the number of words + * in the packet is a multiple of N. +***************************************************************************/ +template ::size> struct PacketBlock { + Packet packet[N]; +}; + +template EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock& /*kernel*/) { + // Nothing to do in the scalar case, i.e. a 1x1 matrix. +} + +/*************************************************************************** + * Selector, i.e. vector of N boolean values used to select (i.e. blend) + * words from 2 packets. +***************************************************************************/ +template struct Selector { + bool select[N]; +}; + +template EIGEN_DEVICE_FUNC inline Packet +pblend(const Selector::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { + return ifPacket.select[0] ? thenPacket : elsePacket; +} + +/** \internal \returns \a a with the first coefficient replaced by the scalar b */ +template EIGEN_DEVICE_FUNC inline Packet +pinsertfirst(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + mask.select[0] = true; + // This for loop should be optimized away by the compiler. + for(Index i=1; i::size; ++i) + mask.select[i] = false; + return pblend(mask, pset1(b), a); +} + +/** \internal \returns \a a with the last coefficient replaced by the scalar b */ +template EIGEN_DEVICE_FUNC inline Packet +pinsertlast(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + // This for loop should be optimized away by the compiler. + for(Index i=0; i::size-1; ++i) + mask.select[i] = false; + mask.select[unpacket_traits::size-1] = true; + return pblend(mask, pset1(b), a); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GENERIC_PACKET_MATH_H diff --git a/thirdparty/eigen/Eigen/src/Core/GlobalFunctions.h b/thirdparty/eigen/Eigen/src/Core/GlobalFunctions.h new file mode 100644 index 000000000..769dc255c --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/GlobalFunctions.h @@ -0,0 +1,187 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2016 Gael Guennebaud +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GLOBAL_FUNCTIONS_H +#define EIGEN_GLOBAL_FUNCTIONS_H + +#ifdef EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ + /** \returns an expression of the coefficient-wise DOC_OP of \a x + + DOC_DETAILS + + \sa Math functions, class CwiseUnaryOp + */ \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + NAME(const Eigen::ArrayBase& x); + +#else + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + (NAME)(const Eigen::ArrayBase& x) { \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ + } + +#endif // EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ + \ + template \ + struct NAME##_retval > \ + { \ + typedef const Eigen::CwiseUnaryOp, const Derived> type; \ + }; \ + template \ + struct NAME##_impl > \ + { \ + static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ + { \ + return typename NAME##_retval >::type(x.derived()); \ + } \ + }; + +namespace Eigen +{ + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) + + /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + + template + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) + pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { + return x.derived().pow(exponent); + } +#endif + + /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power. + * + * Example: \include Cwise_array_power_array.cpp + * Output: \verbinclude Cwise_array_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) + { + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( + x.derived(), + exponents.derived() + ); + } + + /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power between a scalar and an array of exponents. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + * + * Example: \include Cwise_scalar_power_array.cpp + * Output: \verbinclude Cwise_scalar_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar), + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } + + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } +#endif + + + namespace internal + { + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag,scalar_imag_op) + EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2,scalar_abs2_op) + } +} + +// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, internal::isApprox...) + +#endif // EIGEN_GLOBAL_FUNCTIONS_H diff --git a/thirdparty/eigen/Eigen/src/Core/IO.h b/thirdparty/eigen/Eigen/src/Core/IO.h new file mode 100644 index 000000000..94e00f58b --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/IO.h @@ -0,0 +1,239 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_IO_H +#define EIGEN_IO_H + +namespace Eigen { + +enum { DontAlignCols = 1 }; +enum { StreamPrecision = -1, + FullPrecision = -2 }; + +namespace internal { +template +std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt); +} + +/** \class IOFormat + * \ingroup Core_Module + * + * \brief Stores a set of parameters controlling the way matrices are printed + * + * List of available parameters: + * - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c FullPrecision. + * The default is the special value \c StreamPrecision which means to use the + * stream's own precision setting, as set for instance using \c cout.precision(3). The other special value + * \c FullPrecision means that the number of digits will be computed to match the full precision of each floating-point + * type. + * - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c DontAlignCols which + * allows to disable the alignment of columns, resulting in faster code. + * - \b coeffSeparator string printed between two coefficients of the same row + * - \b rowSeparator string printed between two rows + * - \b rowPrefix string printed at the beginning of each row + * - \b rowSuffix string printed at the end of each row + * - \b matPrefix string printed at the beginning of the matrix + * - \b matSuffix string printed at the end of the matrix + * + * Example: \include IOFormat.cpp + * Output: \verbinclude IOFormat.out + * + * \sa DenseBase::format(), class WithFormat + */ +struct IOFormat +{ + /** Default constructor, see class IOFormat for the meaning of the parameters */ + IOFormat(int _precision = StreamPrecision, int _flags = 0, + const std::string& _coeffSeparator = " ", + const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="", + const std::string& _matPrefix="", const std::string& _matSuffix="") + : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator), + rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags) + { + // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // don't add rowSpacer if columns are not to be aligned + if((flags & DontAlignCols)) + return; + int i = int(matSuffix.length())-1; + while (i>=0 && matSuffix[i]!='\n') + { + rowSpacer += ' '; + i--; + } + } + std::string matPrefix, matSuffix; + std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer; + std::string coeffSeparator; + int precision; + int flags; +}; + +/** \class WithFormat + * \ingroup Core_Module + * + * \brief Pseudo expression providing matrix output with given format + * + * \tparam ExpressionType the type of the object on which IO stream operations are performed + * + * This class represents an expression with stream operators controlled by a given IOFormat. + * It is the return type of DenseBase::format() + * and most of the time this is the only way it is used. + * + * See class IOFormat for some examples. + * + * \sa DenseBase::format(), class IOFormat + */ +template +class WithFormat +{ + public: + + WithFormat(const ExpressionType& matrix, const IOFormat& format) + : m_matrix(matrix), m_format(format) + {} + + friend std::ostream & operator << (std::ostream & s, const WithFormat& wf) + { + return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format); + } + + protected: + const typename ExpressionType::Nested m_matrix; + IOFormat m_format; +}; + +/** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ +template +inline const WithFormat +DenseBase::format(const IOFormat& fmt) const +{ + return WithFormat(derived(), fmt); +} + +namespace internal { + +// NOTE: This helper is kept for backward compatibility with previous code specializing +// this internal::significant_decimals_impl structure. In the future we should directly +// call digits10() which has been introduced in July 2016 in 3.3. +template +struct significant_decimals_impl +{ + static inline int run() + { + return NumTraits::digits10(); + } +}; + +/** \internal + * print the matrix \a _m to the output stream \a s using the output format \a fmt */ +template +std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& fmt) +{ + if(_m.size() == 0) + { + s << fmt.matPrefix << fmt.matSuffix; + return s; + } + + typename Derived::Nested m = _m; + typedef typename Derived::Scalar Scalar; + + Index width = 0; + + std::streamsize explicit_precision; + if(fmt.precision == StreamPrecision) + { + explicit_precision = 0; + } + else if(fmt.precision == FullPrecision) + { + if (NumTraits::IsInteger) + { + explicit_precision = 0; + } + else + { + explicit_precision = significant_decimals_impl::run(); + } + } + else + { + explicit_precision = fmt.precision; + } + + std::streamsize old_precision = 0; + if(explicit_precision) old_precision = s.precision(explicit_precision); + + bool align_cols = !(fmt.flags & DontAlignCols); + if(align_cols) + { + // compute the largest width + for(Index j = 0; j < m.cols(); ++j) + for(Index i = 0; i < m.rows(); ++i) + { + std::stringstream sstr; + sstr.copyfmt(s); + sstr << m.coeff(i,j); + width = std::max(width, Index(sstr.str().length())); + } + } + s << fmt.matPrefix; + for(Index i = 0; i < m.rows(); ++i) + { + if (i) + s << fmt.rowSpacer; + s << fmt.rowPrefix; + if(width) s.width(width); + s << m.coeff(i, 0); + for(Index j = 1; j < m.cols(); ++j) + { + s << fmt.coeffSeparator; + if (width) s.width(width); + s << m.coeff(i, j); + } + s << fmt.rowSuffix; + if( i < m.rows() - 1) + s << fmt.rowSeparator; + } + s << fmt.matSuffix; + if(explicit_precision) s.precision(old_precision); + return s; +} + +} // end namespace internal + +/** \relates DenseBase + * + * Outputs the matrix, to the given stream. + * + * If you wish to print the matrix with a format different than the default, use DenseBase::format(). + * + * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers. + * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default parameters. + * + * \sa DenseBase::format() + */ +template +std::ostream & operator << +(std::ostream & s, + const DenseBase & m) +{ + return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT); +} + +} // end namespace Eigen + +#endif // EIGEN_IO_H diff --git a/thirdparty/eigen/Eigen/src/Core/Inverse.h b/thirdparty/eigen/Eigen/src/Core/Inverse.h new file mode 100644 index 000000000..f303aebf9 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Inverse.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +template class InverseImpl; + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> +{ +public: + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::PlainObject PlainObject; + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; + + explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > + : public evaluator::PlainObject> +{ + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/thirdparty/eigen/Eigen/src/Core/Map.h b/thirdparty/eigen/Eigen/src/Core/Map.h new file mode 100644 index 000000000..06d196702 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Map.h @@ -0,0 +1,164 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAP_H +#define EIGEN_MAP_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : public traits +{ + typedef traits TraitsBase; + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + Alignment = int(MapOptions)&int(AlignedMask), + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) + }; +private: + enum { Options }; // Expressions don't have Options +}; +} + +/** \class Map + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing array of data. + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout + * of an ordinary, contiguous array. This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class represents a matrix or vector expression mapping an existing array of data. + * It can be used to let Eigen interface without any overhead with non-Eigen data structures, + * such as plain C arrays or structures from other libraries. By default, it assumes that the + * data is laid out contiguously in memory. You can however override this by explicitly specifying + * inner and outer strides. + * + * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix: + * \include Map_simple.cpp + * Output: \verbinclude Map_simple.out + * + * If you need to map non-contiguous arrays, you can do so by specifying strides: + * + * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer + * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time + * fixed value. + * \include Map_inner_stride.cpp + * Output: \verbinclude Map_inner_stride.out + * + * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping + * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns. + * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is + * a short version of \c OuterStride because the default template parameter of OuterStride + * is \c Dynamic + * \include Map_outer_stride.cpp + * Output: \verbinclude Map_outer_stride.out + * + * For more details and for an example of specifying both an inner and an outer stride, see class Stride. + * + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ + * placement new syntax: + * + * Example: \include Map_placement_new.cpp + * Output: \verbinclude Map_placement_new.out + * + * This class is the return type of PlainObjectBase::Map() but can also be used directly. + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ +template class Map + : public MapBase > +{ + public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Map) + + typedef typename Base::PointerType PointerType; + typedef PointerType PointerArgType; + EIGEN_DEVICE_FUNC + inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } + + EIGEN_DEVICE_FUNC + inline Index innerStride() const + { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + EIGEN_DEVICE_FUNC + inline Index outerStride() const + { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); + } + + /** Constructor in the fixed-size case. + * + * \param dataPtr pointer to the array to map + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC + explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) + { + PlainObjectType::Base::_check_template_params(); + } + + /** Constructor in the dynamic-size vector case. + * + * \param dataPtr pointer to the array to map + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) + { + PlainObjectType::Base::_check_template_params(); + } + + /** Constructor in the dynamic-size matrix case. + * + * \param dataPtr pointer to the array to map + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) + { + PlainObjectType::Base::_check_template_params(); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) + + protected: + StrideType m_stride; +}; + + +} // end namespace Eigen + +#endif // EIGEN_MAP_H diff --git a/thirdparty/eigen/Eigen/src/Core/MapBase.h b/thirdparty/eigen/Eigen/src/Core/MapBase.h new file mode 100644 index 000000000..020f939ad --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/MapBase.h @@ -0,0 +1,299 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAPBASE_H +#define EIGEN_MAPBASE_H + +#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) + +namespace Eigen { + +/** \ingroup Core_Module + * + * \brief Base class for dense Map and Block expression with direct access + * + * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense + * Map and Block objects with direct access. + * Typical users do not have to directly deal with this class. + * + * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN. + * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details. + * + * The \c Derived class has to provide the following two methods describing the memory layout: + * \code Index innerStride() const; \endcode + * \code Index outerStride() const; \endcode + * + * \sa class Map, class Block + */ +template class MapBase + : public internal::dense_xpr_base::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + SizeAtCompileTime = Base::SizeAtCompileTime + }; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef typename internal::conditional< + bool(internal::is_lvalue::value), + Scalar *, + const Scalar *>::type + PointerType; + + using Base::derived; +// using Base::RowsAtCompileTime; +// using Base::ColsAtCompileTime; +// using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + using Base::IsRowMajor; + + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::eval; + + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + + // bug 217 - compile error on ICC 11.1 + using Base::operator=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + + /** \copydoc DenseBase::rows() */ + EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } + /** \copydoc DenseBase::cols() */ + EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } + + /** Returns a pointer to the first coefficient of the matrix or vector. + * + * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride(). + * + * \sa innerStride(), outerStride() + */ + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } + + /** \copydoc PlainObjectBase::coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeff(Index rowId, Index colId) const + { + return m_data[colId * colStride() + rowId * rowStride()]; + } + + /** \copydoc PlainObjectBase::coeff(Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeff(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return m_data[index * innerStride()]; + } + + /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index rowId, Index colId) const + { + return this->m_data[colId * colStride() + rowId * rowStride()]; + } + + /** \copydoc PlainObjectBase::coeffRef(Index) const */ + EIGEN_DEVICE_FUNC + inline const Scalar& coeffRef(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + /** \internal */ + template + inline PacketScalar packet(Index rowId, Index colId) const + { + return internal::ploadt + (m_data + (colId * colStride() + rowId * rowStride())); + } + + /** \internal */ + template + inline PacketScalar packet(Index index) const + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return internal::ploadt(m_data + index * innerStride()); + } + + /** \internal Constructor for fixed size matrices or vectors */ + EIGEN_DEVICE_FUNC + explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) + { + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkSanity(); + } + + /** \internal Constructor for dynamically sized vectors */ + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index vecSize) + : m_data(dataPtr), + m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), + m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + eigen_assert(vecSize >= 0); + eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); + checkSanity(); + } + + /** \internal Constructor for dynamically sized matrices */ + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index rows, Index cols) + : m_data(dataPtr), m_rows(rows), m_cols(cols) + { + eigen_assert( (dataPtr == 0) + || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkSanity(); + } + + #ifdef EIGEN_MAPBASE_PLUGIN + #include EIGEN_MAPBASE_PLUGIN + #endif + + protected: + + template + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if<(internal::traits::Alignment>0),void*>::type = 0) const + { +#if EIGEN_MAX_ALIGN_BYTES>0 + eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits::Alignment) == 0) + || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits::Alignment ) && "data is not aligned"); +#endif + } + + template + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if::Alignment==0,void*>::type = 0) const + {} + + PointerType m_data; + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; +}; + +/** \ingroup Core_Module + * + * \brief Base class for non-const dense Map and Block expression with direct access + * + * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of + * dense Map and Block objects with direct access. + * It inherits MapBase which defines the const variant for reading specific entries. + * + * \sa class Map, class Block + */ +template class MapBase + : public MapBase +{ + typedef MapBase ReadOnlyMapBase; + public: + + typedef MapBase Base; + + typedef typename Base::Scalar Scalar; + typedef typename Base::PacketScalar PacketScalar; + typedef typename Base::StorageIndex StorageIndex; + typedef typename Base::PointerType PointerType; + + using Base::derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::colStride; + + typedef typename internal::conditional< + internal::is_lvalue::value, + Scalar, + const Scalar + >::type ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) + { + return this->m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue& coeffRef(Index index) + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& val) + { + internal::pstoret + (this->m_data + (col * colStride() + row * rowStride()), val); + } + + template + inline void writePacket(Index index, const PacketScalar& val) + { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + internal::pstoret + (this->m_data + index * innerStride(), val); + } + + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + + EIGEN_DEVICE_FUNC + Derived& operator=(const MapBase& other) + { + ReadOnlyMapBase::Base::operator=(other); + return derived(); + } + + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; +}; + +#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS + +} // end namespace Eigen + +#endif // EIGEN_MAPBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/MathFunctions.h b/thirdparty/eigen/Eigen/src/Core/MathFunctions.h new file mode 100644 index 000000000..7dfbc92d5 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/MathFunctions.h @@ -0,0 +1,1521 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONS_H +#define EIGEN_MATHFUNCTIONS_H + +// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html +// TODO this should better be moved to NumTraits +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + + +namespace Eigen { + +// On WINCE, std::abs is defined for int only, so let's defined our own overloads: +// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 +long abs(long x) { return (labs(x)); } +double abs(double x) { return (fabs(x)); } +float abs(float x) { return (fabsf(x)); } +long double abs(long double x) { return (fabsl(x)); } +#endif + +namespace internal { + +/** \internal \class global_math_functions_filtering_base + * + * What it does: + * Defines a typedef 'type' as follows: + * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then + * global_math_functions_filtering_base::type is a typedef for it. + * - otherwise, global_math_functions_filtering_base::type is a typedef for T. + * + * How it's used: + * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions. + * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know + * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase. + * So we must make sure to use sin_impl > and not sin_impl, otherwise our partial specialization + * won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells it. + * + * How it's implemented: + * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you replace + * the typename dummy by an integer template parameter, it doesn't work anymore! + */ + +template +struct global_math_functions_filtering_base +{ + typedef T type; +}; + +template struct always_void { typedef void type; }; + +template +struct global_math_functions_filtering_base + ::type + > +{ + typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type; +}; + +#define EIGEN_MATHFUNC_IMPL(func, scalar) Eigen::internal::func##_impl::type> +#define EIGEN_MATHFUNC_RETVAL(func, scalar) typename Eigen::internal::func##_retval::type>::type + +/**************************************************************************** +* Implementation of real * +****************************************************************************/ + +template::IsComplex> +struct real_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return x; + } +}; + +template +struct real_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + using std::real; + return real(x); + } +}; + +template struct real_impl : real_default_impl {}; + +#ifdef __CUDA_ARCH__ +template +struct real_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.real(); + } +}; +#endif + +template +struct real_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of imag * +****************************************************************************/ + +template::IsComplex> +struct imag_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar&) + { + return RealScalar(0); + } +}; + +template +struct imag_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + using std::imag; + return imag(x); + } +}; + +template struct imag_impl : imag_default_impl {}; + +#ifdef __CUDA_ARCH__ +template +struct imag_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.imag(); + } +}; +#endif + +template +struct imag_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of real_ref * +****************************************************************************/ + +template +struct real_ref_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar& run(Scalar& x) + { + return reinterpret_cast(&x)[0]; + } + EIGEN_DEVICE_FUNC + static inline const RealScalar& run(const Scalar& x) + { + return reinterpret_cast(&x)[0]; + } +}; + +template +struct real_ref_retval +{ + typedef typename NumTraits::Real & type; +}; + +/**************************************************************************** +* Implementation of imag_ref * +****************************************************************************/ + +template +struct imag_ref_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar& run(Scalar& x) + { + return reinterpret_cast(&x)[1]; + } + EIGEN_DEVICE_FUNC + static inline const RealScalar& run(const Scalar& x) + { + return reinterpret_cast(&x)[1]; + } +}; + +template +struct imag_ref_default_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(Scalar&) + { + return Scalar(0); + } + EIGEN_DEVICE_FUNC + static inline const Scalar run(const Scalar&) + { + return Scalar(0); + } +}; + +template +struct imag_ref_impl : imag_ref_default_impl::IsComplex> {}; + +template +struct imag_ref_retval +{ + typedef typename NumTraits::Real & type; +}; + +/**************************************************************************** +* Implementation of conj * +****************************************************************************/ + +template::IsComplex> +struct conj_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + return x; + } +}; + +template +struct conj_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + using std::conj; + return conj(x); + } +}; + +template +struct conj_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of abs2 * +****************************************************************************/ + +template +struct abs2_impl_default +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return x*x; + } +}; + +template +struct abs2_impl_default // IsComplex +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return real(x)*real(x) + imag(x)*imag(x); + } +}; + +template +struct abs2_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return abs2_impl_default::IsComplex>::run(x); + } +}; + +template +struct abs2_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of norm1 * +****************************************************************************/ + +template +struct norm1_default_impl +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(abs); + return abs(real(x)) + abs(imag(x)); + } +}; + +template +struct norm1_default_impl +{ + EIGEN_DEVICE_FUNC + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(abs); + return abs(x); + } +}; + +template +struct norm1_impl : norm1_default_impl::IsComplex> {}; + +template +struct norm1_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of hypot * +****************************************************************************/ + +template +struct hypot_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x, const Scalar& y) + { + EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD_MATH(sqrt); + RealScalar _x = abs(x); + RealScalar _y = abs(y); + Scalar p, qp; + if(_x>_y) + { + p = _x; + qp = _y / p; + } + else + { + p = _y; + qp = _x / p; + } + if(p==RealScalar(0)) return RealScalar(0); + return p * sqrt(RealScalar(1) + qp*qp); + } +}; + +template +struct hypot_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of cast * +****************************************************************************/ + +template +struct cast_impl +{ + EIGEN_DEVICE_FUNC + static inline NewType run(const OldType& x) + { + return static_cast(x); + } +}; + +// here, for once, we're plainly returning NewType: we don't want cast to do weird things. + +template +EIGEN_DEVICE_FUNC +inline NewType cast(const OldType& x) +{ + return cast_impl::run(x); +} + +/**************************************************************************** +* Implementation of round * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct round_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + EIGEN_USING_STD_MATH(round); + return round(x); + } + }; +#else + template + struct round_impl + { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD_MATH(ceil); + return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5)); + } + }; +#endif + +template +struct round_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of arg * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct arg_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; +#else + template::IsComplex> + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); } + }; + + template + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; + + template struct arg_impl : arg_default_impl {}; +#endif + +template +struct arg_retval +{ + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of log1p * +****************************************************************************/ + +namespace std_fallback { + // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar, + // or that there is no suitable std::log1p function available + template + EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits::Real RealScalar; + EIGEN_USING_STD_MATH(log); + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + } +} + +template +struct log1p_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + #if EIGEN_HAS_CXX11_MATH + using std::log1p; + #endif + using std_fallback::log1p; + return log1p(x); + } +}; + + +template +struct log1p_retval +{ + typedef Scalar type; +}; + +/**************************************************************************** +* Implementation of pow * +****************************************************************************/ + +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl +{ + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) + { + EIGEN_USING_STD_MATH(pow); + return pow(x, y); + } +}; + +template +struct pow_impl +{ + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y) + { + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); + if(y & 1) res *= x; + y >>= 1; + while(y) + { + x *= x; + if(y&1) res *= x; + y >>= 1; + } + return res; + } +}; + +/**************************************************************************** +* Implementation of random * +****************************************************************************/ + +template +struct random_default_impl {}; + +template +struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +struct random_retval +{ + typedef Scalar type; +}; + +template inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y); +template inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(); + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return x + (y-x) * Scalar(std::rand()) / Scalar(RAND_MAX); + } + static inline Scalar run() + { + return run(Scalar(NumTraits::IsSigned ? -1 : 0), Scalar(1)); + } +}; + +enum { + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus +}; + +template struct meta_floor_log2_selector +{ + enum { middle = (lower + upper) / 2, + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) + }; +}; + +template::value> +struct meta_floor_log2 {}; + +template +struct meta_floor_log2 +{ + enum { value = meta_floor_log2::middle>::value }; +}; + +template +struct meta_floor_log2 +{ + enum { value = meta_floor_log2::middle, upper>::value }; +}; + +template +struct meta_floor_log2 +{ + enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; +}; + +template +struct meta_floor_log2 +{ + // no value, error at compile time +}; + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; + if(y=x the result converted to an unsigned long is still correct. + std::size_t range = ScalarX(y)-ScalarX(x); + std::size_t offset = 0; + // rejection sampling + std::size_t divisor = 1; + std::size_t multiplier = 1; + if(range range); + return Scalar(ScalarX(x) + offset); + } + + static inline Scalar run() + { +#ifdef EIGEN_MAKING_DOCS + return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); +#else + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, + scalar_bits = sizeof(Scalar) * CHAR_BIT, + shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), + offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 + }; + return Scalar((std::rand() >> shift) - offset); +#endif + } +}; + +template +struct random_default_impl +{ + static inline Scalar run(const Scalar& x, const Scalar& y) + { + return Scalar(random(real(x), real(y)), + random(imag(x), imag(y))); + } + static inline Scalar run() + { + typedef typename NumTraits::Real RealScalar; + return Scalar(random(), random()); + } +}; + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() +{ + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +} + +// Implementatin of is* functions + +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T&) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isfinite_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isfinite)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x<=NumTraits::highest() && x>=NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isinf_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isinf)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isnan_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isnan)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; } + +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +// The following overload are defined at the end of this file +template EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x); +template EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); +template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); + +template T generic_fast_tanh_float(const T& a_x); + +} // end namespace internal + +/**************************************************************************** +* Generic math functions * +****************************************************************************/ + +namespace numext { + +#ifndef __CUDA_ARCH__ +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} +#else +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + return y < x ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) +{ + return fminf(x, y); +} +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + return x < y ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) +{ + return fmaxf(x, y); +} +#endif + + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x) +{ + return internal::real_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x) +{ + return internal::imag_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) +{ + return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); +} + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float log1p(float x) { return cl::sycl::log1p(x); } +EIGEN_ALWAYS_INLINE double log1p(double x) { return cl::sycl::log1p(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log1p(const float &x) { return ::log1pf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log1p(const double &x) { return ::log1p(x); } +#endif + +template +EIGEN_DEVICE_FUNC +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) +{ + return internal::pow_impl::run(x, y); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float pow(float x, float y) { return cl::sycl::pow(x, y); } +EIGEN_ALWAYS_INLINE double pow(double x, double y) { return cl::sycl::pow(x, y); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float isnan(float x) { return cl::sycl::isnan(x); } +EIGEN_ALWAYS_INLINE double isnan(double x) { return cl::sycl::isnan(x); } +EIGEN_ALWAYS_INLINE float isinf(float x) { return cl::sycl::isinf(x); } +EIGEN_ALWAYS_INLINE double isinf(double x) { return cl::sycl::isinf(x); } +EIGEN_ALWAYS_INLINE float isfinite(float x) { return cl::sycl::isfinite(x); } +EIGEN_ALWAYS_INLINE double isfinite(double x) { return cl::sycl::isfinite(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float round(float x) { return cl::sycl::round(x); } +EIGEN_ALWAYS_INLINE double round(double x) { return cl::sycl::round(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +template +EIGEN_DEVICE_FUNC +T (floor)(const T& x) +{ + EIGEN_USING_STD_MATH(floor); + return floor(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); } +EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float floor(const float &x) { return ::floorf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double floor(const double &x) { return ::floor(x); } +#endif + +template +EIGEN_DEVICE_FUNC +T (ceil)(const T& x) +{ + EIGEN_USING_STD_MATH(ceil); + return ceil(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); } +EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float ceil(const float &x) { return ::ceilf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double ceil(const double &x) { return ::ceil(x); } +#endif + + +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ +inline int log2(int x) +{ + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; +} + +/** \returns the square root of \a x. + * + * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sqrt(const T &x) +{ + EIGEN_USING_STD_MATH(sqrt); + return sqrt(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float sqrt(float x) { return cl::sycl::sqrt(x); } +EIGEN_ALWAYS_INLINE double sqrt(double x) { return cl::sycl::sqrt(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T log(const T &x) { + EIGEN_USING_STD_MATH(log); + return log(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float log(float x) { return cl::sycl::log(x); } +EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log(const float &x) { return ::logf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log(const double &x) { return ::log(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename NumTraits::Real abs(const T &x) { + EIGEN_USING_STD_MATH(abs); + return abs(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } +EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const float &x) { return ::fabsf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const double &x) { return ::fabs(x); } + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const std::complex& x) { + return ::hypotf(x.real(), x.imag()); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const std::complex& x) { + return ::hypot(x.real(), x.imag()); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T exp(const T &x) { + EIGEN_USING_STD_MATH(exp); + return exp(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); } +EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float exp(const float &x) { return ::expf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double exp(const double &x) { return ::exp(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T cos(const T &x) { + EIGEN_USING_STD_MATH(cos); + return cos(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); } +EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float cos(const float &x) { return ::cosf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double cos(const double &x) { return ::cos(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sin(const T &x) { + EIGEN_USING_STD_MATH(sin); + return sin(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); } +EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sin(const float &x) { return ::sinf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sin(const double &x) { return ::sin(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tan(const T &x) { + EIGEN_USING_STD_MATH(tan); + return tan(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); } +EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tan(const float &x) { return ::tanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tan(const double &x) { return ::tan(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T acos(const T &x) { + EIGEN_USING_STD_MATH(acos); + return acos(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float acos(float x) { return cl::sycl::acos(x); } +EIGEN_ALWAYS_INLINE double acos(double x) { return cl::sycl::acos(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float acos(const float &x) { return ::acosf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double acos(const double &x) { return ::acos(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T asin(const T &x) { + EIGEN_USING_STD_MATH(asin); + return asin(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float asin(float x) { return cl::sycl::asin(x); } +EIGEN_ALWAYS_INLINE double asin(double x) { return cl::sycl::asin(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float asin(const float &x) { return ::asinf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double asin(const double &x) { return ::asin(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T atan(const T &x) { + EIGEN_USING_STD_MATH(atan); + return atan(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float atan(float x) { return cl::sycl::atan(x); } +EIGEN_ALWAYS_INLINE double atan(double x) { return cl::sycl::atan(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float atan(const float &x) { return ::atanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double atan(const double &x) { return ::atan(x); } +#endif + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T cosh(const T &x) { + EIGEN_USING_STD_MATH(cosh); + return cosh(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); } +EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float cosh(const float &x) { return ::coshf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double cosh(const double &x) { return ::cosh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sinh(const T &x) { + EIGEN_USING_STD_MATH(sinh); + return sinh(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); } +EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sinh(const float &x) { return ::sinhf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sinh(const double &x) { return ::sinh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tanh(const T &x) { + EIGEN_USING_STD_MATH(tanh); + return tanh(x); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); } +EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); } +#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(float x) { return internal::generic_fast_tanh_float(x); } +#endif + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(const float &x) { return ::tanhf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tanh(const double &x) { return ::tanh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T fmod(const T& a, const T& b) { + EIGEN_USING_STD_MATH(fmod); + return fmod(a, b); +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y); } +EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float fmod(const float& a, const float& b) { + return ::fmodf(a, b); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double fmod(const double& a, const double& b) { + return ::fmod(a, b); +} +#endif + +} // end namespace numext + +namespace internal { + +template +EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + +/**************************************************************************** +* Implementation of fuzzy comparisons * +****************************************************************************/ + +template +struct scalar_fuzzy_default_impl {}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) + { + return numext::abs(x) <= numext::abs(y) * prec; + } + EIGEN_DEVICE_FUNC + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec; + } + EIGEN_DEVICE_FUNC + static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return x <= y || isApprox(x, y, prec); + } +}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) + { + return x == Scalar(0); + } + EIGEN_DEVICE_FUNC + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) + { + return x == y; + } + EIGEN_DEVICE_FUNC + static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) + { + return x <= y; + } +}; + +template +struct scalar_fuzzy_default_impl +{ + typedef typename NumTraits::Real RealScalar; + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) + { + return numext::abs2(x) <= numext::abs2(y) * prec * prec; + } + EIGEN_DEVICE_FUNC + static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) + { + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; + } +}; + +template +struct scalar_fuzzy_impl : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template EIGEN_DEVICE_FUNC +inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); +} + +template EIGEN_DEVICE_FUNC +inline bool isApprox(const Scalar& x, const Scalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::isApprox(x, y, precision); +} + +template EIGEN_DEVICE_FUNC +inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, + const typename NumTraits::Real &precision = NumTraits::dummy_precision()) +{ + return scalar_fuzzy_impl::isApproxOrLessThan(x, y, precision); +} + +/****************************************** +*** The special case of the bool type *** +******************************************/ + +template<> struct random_impl +{ + static inline bool run() + { + return random(0,1)==0 ? false : true; + } +}; + +template<> struct scalar_fuzzy_impl +{ + typedef bool RealScalar; + + template EIGEN_DEVICE_FUNC + static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) + { + return !x; + } + + EIGEN_DEVICE_FUNC + static inline bool isApprox(bool x, bool y, bool) + { + return x == y; + } + + EIGEN_DEVICE_FUNC + static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) + { + return (!x) || y; + } + +}; + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONS_H diff --git a/thirdparty/eigen/Eigen/src/Core/MathFunctionsImpl.h b/thirdparty/eigen/Eigen/src/Core/MathFunctionsImpl.h new file mode 100644 index 000000000..3c9ef22fa --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONSIMPL_H +#define EIGEN_MATHFUNCTIONSIMPL_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulp in the range [-9, 9], outside of which + the tanh(x) = +/-1. + + This implementation works on both scalars and packets. +*/ +template +T generic_fast_tanh_float(const T& a_x) +{ + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_9 = pset1(9.f); + const T minus_9 = pset1(-9.f); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03f); + const T alpha_3 = pset1(6.37261928875436e-04f); + const T alpha_5 = pset1(1.48572235717979e-05f); + const T alpha_7 = pset1(5.12229709037114e-08f); + const T alpha_9 = pset1(-8.60467152213735e-11f); + const T alpha_11 = pset1(2.00018790482477e-13f); + const T alpha_13 = pset1(-2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03f); + const T beta_2 = pset1(2.26843463243900e-03f); + const T beta_4 = pset1(1.18534705686654e-04f); + const T beta_6 = pset1(1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONSIMPL_H diff --git a/thirdparty/eigen/Eigen/src/Core/Matrix.h b/thirdparty/eigen/Eigen/src/Core/Matrix.h new file mode 100644 index 000000000..90c336d8c --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Matrix.h @@ -0,0 +1,461 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_H +#define EIGEN_MATRIX_H + +namespace Eigen { + +namespace internal { +template +struct traits > +{ +private: + enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; + typedef typename find_best_packet<_Scalar,size>::type PacketScalar; + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = unpacket_traits::alignment, + packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0 + }; + +public: + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _MaxRows, + MaxColsAtCompileTime = _MaxCols, + Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, + Options = _Options, + InnerStrideAtCompileTime = 1, + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment + }; +}; +} + +/** \class Matrix + * \ingroup Core_Module + * + * \brief The matrix class, also used for vectors and row-vectors + * + * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen. + * Vectors are matrices with one column, and row-vectors are matrices with one row. + * + * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). + * + * The first three template parameters are required: + * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required + * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. + * \tparam _MaxRows Maximum number of rows. Defaults to \a _Rows (\ref maxrows "note"). + * \tparam _MaxCols Maximum number of columns. Defaults to \a _Cols (\ref maxrows "note"). + * + * Eigen provides a number of typedefs covering the usual cases. Here are some examples: + * + * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix) + * \li \c Vector4f is a vector of 4 floats (\c Matrix) + * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix) + * + * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix) + * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix) + * + * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix) + * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix) + * + * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs. + * + * You can access elements of vectors and matrices using normal subscripting: + * + * \code + * Eigen::VectorXd v(10); + * v[0] = 0.1; + * v[1] = 0.2; + * v(0) = 0.3; + * v(1) = 0.4; + * + * Eigen::MatrixXi m(10, 10); + * m(0, 1) = 1; + * m(0, 2) = 2; + * m(0, 3) = 3; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. + * + * Some notes: + * + *

+ *
\anchor dense Dense versus sparse:
+ *
This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the Sparse module. + * + * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary contiguous array. + * This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero coefficients.
+ * + *
\anchor fixedsize Fixed-size versus dynamic-size:
+ *
Fixed-size means that the numbers of rows and columns are known are compile-time. In this case, Eigen allocates the array + * of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, typically up to 4x4, sometimes up + * to 16x16. Larger matrices should be declared as dynamic-size even if one happens to know their size at compile-time. + * + * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they are runtime + * variables, and the array of coefficients is allocated dynamically on the heap. + * + * Note that \em dense matrices, be they Fixed-size or Dynamic-size, do not expand dynamically in the sense of a std::map. + * If you want this behavior, see the Sparse module.
+ * + *
\anchor maxrows _MaxRows and _MaxCols:
+ *
In most cases, one just leaves these parameters to the default values. + * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases + * when the exact numbers of rows and columns are not known are compile-time, but it is known at compile-time that they cannot + * exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case _MaxRows and _MaxCols + * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic.
+ *
+ * + * ABI and storage layout + * + * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. + * + * + * + * + * + * + *
Matrix typeEquivalent C structure
\code Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index rows, cols; + * }; + * \endcode
\code + * Matrix + * Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index size; + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0 + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0 + * Eigen::Index rows, cols; + * }; + * \endcode
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two + * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. + * + * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, + * \ref TopicStorageOrders + */ + +template +class Matrix + : public PlainObjectBase > +{ + public: + + /** \brief Base class typedef. + * \sa PlainObjectBase + */ + typedef PlainObjectBase Base; + + enum { Options = _Options }; + + EIGEN_DENSE_PUBLIC_INTERFACE(Matrix) + + typedef typename Base::PlainObject PlainObject; + + using Base::base; + using Base::coeffRef; + + /** + * \brief Assigns matrices to each other. + * + * \note This is a special case of the templated operator=. Its purpose is + * to prevent a default operator= from hiding the templated operator=. + * + * \callgraph + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) + { + return Base::_set(other); + } + + /** \internal + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase& other) + { + return Base::_set(other); + } + + /* Here, doxygen failed to copy the brief information when using \copydoc */ + + /** + * \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase &other) + { + return Base::operator=(other); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) + { + return Base::operator=(func); + } + + /** \brief Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix() : Base() + { + Base::_check_template_params(); + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + // FIXME is it still needed + EIGEN_DEVICE_FUNC + explicit Matrix(internal::constructor_without_unaligned_array_assert) + : Base(internal::constructor_without_unaligned_array_assert()) + { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) + : Base(std::move(other)) + { + Base::_check_template_params(); + if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) + Base::_set_noalias(other); + } + EIGEN_DEVICE_FUNC + Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) + { + other.swap(*this); + return *this; + } +#endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + + // This constructor is for both 1x1 matrices and dynamic vectors + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Matrix(const T& x) + { + Base::_check_template_params(); + Base::template _init1(x); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) + { + Base::_check_template_params(); + Base::template _init2(x, y); + } + #else + /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC + explicit Matrix(const Scalar *data); + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim); + /** \brief Constructs an initialized 1x1 matrix with the given coefficient */ + Matrix(const Scalar& x); + /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size matrices. For fixed-size matrices, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_DEVICE_FUNC + Matrix(Index rows, Index cols); + + /** \brief Constructs an initialized 2D vector with given coefficients */ + Matrix(const Scalar& x, const Scalar& y); + #endif + + /** \brief Constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + } + /** \brief Constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) + { + Base::_check_template_params(); + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + m_storage.data()[3] = w; + } + + + /** \brief Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) + { } + + /** \brief Copy constructor for generic expressions. + * \sa MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const EigenBase &other) + : Base(other.derived()) + { } + + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } + + /////////// Geometry module /////////// + + template + EIGEN_DEVICE_FUNC + explicit Matrix(const RotationBase& r); + template + EIGEN_DEVICE_FUNC + Matrix& operator=(const RotationBase& r); + + // allow to extend Matrix outside Eigen + #ifdef EIGEN_MATRIX_PLUGIN + #include EIGEN_MATRIX_PLUGIN + #endif + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; +}; + +/** \defgroup matrixtypedefs Global matrix typedefs + * + * \ingroup Core_Module + * + * Eigen defines several typedef shortcuts for most common matrix and vector types. + * + * The general patterns are the following: + * + * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of floats. + * + * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is + * a fixed-size vector of 4 complex floats. + * + * \sa class Matrix + */ + +#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##SizeSuffix##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Vector##SizeSuffix##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##Size##X##TypeSuffix; \ +/** \ingroup matrixtypedefs */ \ +typedef Matrix Matrix##X##Size##TypeSuffix; + +#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \ +EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ +EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_TYPEDEFS +#undef EIGEN_MAKE_FIXED_TYPEDEFS + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/thirdparty/eigen/Eigen/src/Core/MatrixBase.h b/thirdparty/eigen/Eigen/src/Core/MatrixBase.h new file mode 100644 index 000000000..f7cf04cde --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/MatrixBase.h @@ -0,0 +1,530 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXBASE_H +#define EIGEN_MATRIXBASE_H + +namespace Eigen { + +/** \class MatrixBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and expressions + * + * This class is the base that is inherited by all matrix, vector, and related expression + * types. Most of the Eigen API is contained in this class, and its base classes. Other important + * classes for the Eigen API are Matrix, and VectorwiseOp. + * + * Note that some methods are defined in other modules such as the \ref LU_Module LU module + * for all functions related to matrix inversions. + * + * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc. + * + * When writing a function taking Eigen objects as argument, if you want your function + * to take as argument any matrix, vector, or expression, just let it take a + * MatrixBase argument. As an example, here is a function printFirstRow which, given + * a matrix, vector, or expression \a x, prints the first row of \a x. + * + * \code + template + void printFirstRow(const Eigen::MatrixBase& x) + { + cout << x.row(0) << endl; + } + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. + * + * \sa \blank \ref TopicClassHierarchy + */ +template class MatrixBase + : public DenseBase +{ + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef MatrixBase StorageBaseType; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + using Base::derived; + using Base::const_cast_derived; + using Base::rows; + using Base::cols; + using Base::size; + using Base::coeff; + using Base::coeffRef; + using Base::lazyAssign; + using Base::eval; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; + typedef typename Base::RowXpr RowXpr; + typedef typename Base::ColXpr ColXpr; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** type of the equivalent square matrix */ + typedef Matrix SquareMatrixType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** \returns the size of the main diagonal, which is min(rows(),cols()). + * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC + inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); } + + typedef typename Base::PlainObject PlainObject; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; + /** \internal the return type of MatrixBase::adjoint() */ + typedef typename internal::conditional::IsComplex, + CwiseUnaryOp, ConstTransposeReturnType>, + ConstTransposeReturnType + >::type AdjointReturnType; + /** \internal Return type of eigenvalues() */ + typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; + /** \internal the return type of identity */ + typedef CwiseNullaryOp,PlainObject> IdentityReturnType; + /** \internal the return type of unit vectors */ + typedef Block, SquareMatrixType>, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime> BasisReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" +# include "../plugins/CommonCwiseBinaryOps.h" +# include "../plugins/MatrixCwiseUnaryOps.h" +# include "../plugins/MatrixCwiseBinaryOps.h" +# ifdef EIGEN_MATRIXBASE_PLUGIN +# include EIGEN_MATRIXBASE_PLUGIN +# endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const MatrixBase& other); + + // We cannot inherit here via Base::operator= since it is causing + // trouble with MSVC. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const DenseBase& other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC + Derived& operator=(const ReturnByValue& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const MatrixBase& other); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const MatrixBase& other); + +#ifdef __CUDACC__ + template + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &other) const + { return this->lazyProduct(other); } +#else + + template + const Product + operator*(const MatrixBase &other) const; + +#endif + + template + EIGEN_DEVICE_FUNC + const Product + lazyProduct(const MatrixBase &other) const; + + template + Derived& operator*=(const EigenBase& other); + + template + void applyOnTheLeft(const EigenBase& other); + + template + void applyOnTheRight(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC + const Product + operator*(const DiagonalBase &diagonal) const; + + template + EIGEN_DEVICE_FUNC + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType + dot(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC RealScalar norm() const; + RealScalar stableNorm() const; + RealScalar blueNorm() const; + RealScalar hypotNorm() const; + EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const; + EIGEN_DEVICE_FUNC void normalize(); + EIGEN_DEVICE_FUNC void stableNormalize(); + + EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC void adjointInPlace(); + + typedef Diagonal DiagonalReturnType; + EIGEN_DEVICE_FUNC + DiagonalReturnType diagonal(); + + typedef typename internal::add_const >::type ConstDiagonalReturnType; + EIGEN_DEVICE_FUNC + ConstDiagonalReturnType diagonal() const; + + template struct DiagonalIndexReturnType { typedef Diagonal Type; }; + template struct ConstDiagonalIndexReturnType { typedef const Diagonal Type; }; + + template + EIGEN_DEVICE_FUNC + typename DiagonalIndexReturnType::Type diagonal(); + + template + EIGEN_DEVICE_FUNC + typename ConstDiagonalIndexReturnType::Type diagonal() const; + + typedef Diagonal DiagonalDynamicIndexReturnType; + typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + + EIGEN_DEVICE_FUNC + DiagonalDynamicIndexReturnType diagonal(Index index); + EIGEN_DEVICE_FUNC + ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; + + template struct TriangularViewReturnType { typedef TriangularView Type; }; + template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; + + template + EIGEN_DEVICE_FUNC + typename TriangularViewReturnType::Type triangularView(); + template + EIGEN_DEVICE_FUNC + typename ConstTriangularViewReturnType::Type triangularView() const; + + template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; + template struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView Type; }; + + template + EIGEN_DEVICE_FUNC + typename SelfAdjointViewReturnType::Type selfadjointView(); + template + EIGEN_DEVICE_FUNC + typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + + const SparseView sparseView(const Scalar& m_reference = Scalar(0), + const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(); + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitX(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitY(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); + + EIGEN_DEVICE_FUNC + const DiagonalWrapper asDiagonal() const; + const PermutationWrapper asPermutation() const; + + EIGEN_DEVICE_FUNC + Derived& setIdentity(); + EIGEN_DEVICE_FUNC + Derived& setIdentity(Index rows, Index cols); + + bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isDiagonal(const RealScalar& prec = NumTraits::dummy_precision()) const; + + bool isUpperTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isLowerTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + + template + bool isOrthogonal(const MatrixBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isUnitary(const RealScalar& prec = NumTraits::dummy_precision()) const; + + /** \returns true if each coefficients of \c *this and \a other are all exactly equal. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator!= */ + template + inline bool operator==(const MatrixBase& other) const + { return cwiseEqual(other).all(); } + + /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator== */ + template + inline bool operator!=(const MatrixBase& other) const + { return cwiseNotEqual(other).any(); } + + NoAlias noalias(); + + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template inline const Derived& forceAlignedAccessIf() const { return derived(); } + template inline Derived& forceAlignedAccessIf() { return derived(); } + + EIGEN_DEVICE_FUNC Scalar trace() const; + + template EIGEN_DEVICE_FUNC RealScalar lpNorm() const; + + EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } + + /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } + +/////////// LU module /////////// + + inline const FullPivLU fullPivLu() const; + inline const PartialPivLU partialPivLu() const; + + inline const PartialPivLU lu() const; + + inline const Inverse inverse() const; + + template + inline void computeInverseAndDetWithCheck( + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() + ) const; + template + inline void computeInverseWithCheck( + ResultType& inverse, + bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() + ) const; + Scalar determinant() const; + +/////////// Cholesky module /////////// + + inline const LLT llt() const; + inline const LDLT ldlt() const; + +/////////// QR module /////////// + + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; + inline const CompleteOrthogonalDecomposition completeOrthogonalDecomposition() const; + +/////////// Eigenvalues module /////////// + + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; + +/////////// SVD module /////////// + + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; + +/////////// Geometry module /////////// + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /// \internal helper struct to form the return type of the cross product + template struct cross_product_return_type { + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef Matrix type; + }; + #endif // EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC +#ifndef EIGEN_PARSED_BY_DOXYGEN + inline typename cross_product_return_type::type +#else + inline PlainObject +#endif + cross(const MatrixBase& other) const; + + template + EIGEN_DEVICE_FUNC + inline PlainObject cross3(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC + inline PlainObject unitOrthogonal(void) const; + + EIGEN_DEVICE_FUNC + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + + // put this as separate enum value to work around possible GCC 4.3 bug (?) + enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) + : ColsAtCompileTime==1 ? Vertical : Horizontal }; + typedef Homogeneous HomogeneousReturnType; + EIGEN_DEVICE_FUNC + inline HomogeneousReturnType homogeneous() const; + + enum { + SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 + }; + typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, + internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; + EIGEN_DEVICE_FUNC + inline const HNormalizedReturnType hnormalized() const; + +////////// Householder module /////////// + + void makeHouseholderInPlace(Scalar& tau, RealScalar& beta); + template + void makeHouseholder(EssentialPart& essential, + Scalar& tau, RealScalar& beta) const; + template + void applyHouseholderOnTheLeft(const EssentialPart& essential, + const Scalar& tau, + Scalar* workspace); + template + void applyHouseholderOnTheRight(const EssentialPart& essential, + const Scalar& tau, + Scalar* workspace); + +///////// Jacobi module ///////// + + template + void applyOnTheLeft(Index p, Index q, const JacobiRotation& j); + template + void applyOnTheRight(Index p, Index q, const JacobiRotation& j); + +///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase &other) const + { + return other.cwiseProduct(derived()); + } + +///////// MatrixFunctions module ///////// + + typedef typename internal::stem_function::type StemFunction; + const MatrixExponentialReturnValue exp() const; + const MatrixFunctionReturnValue matrixFunction(StemFunction f) const; + const MatrixFunctionReturnValue cosh() const; + const MatrixFunctionReturnValue sinh() const; + const MatrixFunctionReturnValue cos() const; + const MatrixFunctionReturnValue sin() const; + const MatrixSquareRootReturnValue sqrt() const; + const MatrixLogarithmReturnValue log() const; + const MatrixPowerReturnValue pow(const RealScalar& p) const; + const MatrixComplexPowerReturnValue pow(const std::complex& p) const; + + protected: + EIGEN_DEVICE_FUNC MatrixBase() : Base() {} + + private: + EIGEN_DEVICE_FUNC explicit MatrixBase(int); + EIGEN_DEVICE_FUNC MatrixBase(int,int); + template EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase&); + protected: + // mixing arrays and matrices is not legal + template Derived& operator+=(const ArrayBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} + // mixing arrays and matrices is not legal + template Derived& operator-=(const ArrayBase& ) + {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;} +}; + + +/*************************************************************************** +* Implementation of matrix base methods +***************************************************************************/ + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline Derived& +MatrixBase::operator*=(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline void MatrixBase::applyOnTheRight(const EigenBase &other) +{ + other.derived().applyThisOnTheRight(derived()); +} + +/** replaces \c *this by \a other * \c *this. + * + * Example: \include MatrixBase_applyOnTheLeft.cpp + * Output: \verbinclude MatrixBase_applyOnTheLeft.out + */ +template +template +inline void MatrixBase::applyOnTheLeft(const EigenBase &other) +{ + other.derived().applyThisOnTheLeft(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIXBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/NestByValue.h b/thirdparty/eigen/Eigen/src/Core/NestByValue.h new file mode 100644 index 000000000..13adf070e --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/NestByValue.h @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NESTBYVALUE_H +#define EIGEN_NESTBYVALUE_H + +namespace Eigen { + +namespace internal { +template +struct traits > : public traits +{}; +} + +/** \class NestByValue + * \ingroup Core_Module + * + * \brief Expression which must be nested by value + * + * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value + * + * This class is the return type of MatrixBase::nestByValue() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::nestByValue() + */ +template class NestByValue + : public internal::dense_xpr_base< NestByValue >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) + + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const + { + return m_expression.coeff(row, col); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) + { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const + { + return m_expression.coeff(index); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) + { + return m_expression.const_cast_derived().coeffRef(index); + } + + template + inline const PacketScalar packet(Index row, Index col) const + { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const + { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) + { + m_expression.const_cast_derived().template writePacket(index, x); + } + + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType m_expression; +}; + +/** \returns an expression of the temporary version of *this. + */ +template +inline const NestByValue +DenseBase::nestByValue() const +{ + return NestByValue(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_NESTBYVALUE_H diff --git a/thirdparty/eigen/Eigen/src/Core/NoAlias.h b/thirdparty/eigen/Eigen/src/Core/NoAlias.h new file mode 100644 index 000000000..33908010b --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/NoAlias.h @@ -0,0 +1,108 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NOALIAS_H +#define EIGEN_NOALIAS_H + +namespace Eigen { + +/** \class NoAlias + * \ingroup Core_Module + * + * \brief Pseudo expression providing an operator = assuming no aliasing + * + * \tparam ExpressionType the type of the object on which to do the lazy assignment + * + * This class represents an expression with special assignment operators + * assuming no aliasing between the target expression and the source expression. + * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression. + * It is the return type of MatrixBase::noalias() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::noalias() + */ +template class StorageBase> +class NoAlias +{ + public: + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + return m_expression; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) + { + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); + return m_expression; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) + { + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); + return m_expression; + } + + EIGEN_DEVICE_FUNC + ExpressionType& expression() const + { + return m_expression; + } + + protected: + ExpressionType& m_expression; +}; + +/** \returns a pseudo expression of \c *this with an operator= assuming + * no aliasing between \c *this and the source expression. + * + * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag. + * Currently, even though several expressions may alias, only product + * expressions have this flag. Therefore, noalias() is only usefull when + * the source expression contains a matrix product. + * + * Here are some examples where noalias is usefull: + * \code + * D.noalias() = A * B; + * D.noalias() += A.transpose() * B; + * D.noalias() -= 2 * A * B.adjoint(); + * \endcode + * + * On the other hand the following example will lead to a \b wrong result: + * \code + * A.noalias() = A * B; + * \endcode + * because the result matrix A is also an operand of the matrix product. Therefore, + * there is no alternative than evaluating A * B in a temporary, that is the default + * behavior when you write: + * \code + * A = A * B; + * \endcode + * + * \sa class NoAlias + */ +template +NoAlias MatrixBase::noalias() +{ + return NoAlias(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_NOALIAS_H diff --git a/thirdparty/eigen/Eigen/src/Core/NumTraits.h b/thirdparty/eigen/Eigen/src/Core/NumTraits.h new file mode 100644 index 000000000..dd61195bc --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/NumTraits.h @@ -0,0 +1,246 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NUMTRAITS_H +#define EIGEN_NUMTRAITS_H + +namespace Eigen { + +namespace internal { + +// default implementation of digits10(), based on numeric_limits if specialized, +// 0 for integer types, and log10(epsilon()) otherwise. +template< typename T, + bool use_numeric_limits = std::numeric_limits::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_digits10_impl +{ + static int run() { return std::numeric_limits::digits10; } +}; + +template +struct default_digits10_impl // Floating point +{ + static int run() { + using std::log10; + using std::ceil; + typedef typename NumTraits::Real Real; + return int(ceil(-log10(NumTraits::epsilon()))); + } +}; + +template +struct default_digits10_impl // Integer +{ + static int run() { return 0; } +}; + +} // end namespace internal + +/** \class NumTraits + * \ingroup Core_Module + * + * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. + * + * \tparam T the numeric type at hand + * + * This class stores enums, typedefs and static methods giving information about a numeric type. + * + * The provided data consists of: + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real + * is a typedef to \a U. + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, + * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives + * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to + * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is + * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. + * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what + * this means, just use \a T here. + * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex + * type, and to 0 otherwise. + * \li An enum value \a IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int, + * and to \c 0 otherwise. + * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed + * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. + * Stay vague here. No need to do architecture-specific stuff. + * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. + * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must + * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. + * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), + * it returns a \a Real instead of a \a T. + * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default + * value by the fuzzy comparison operators. + * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + * \li digits10() function returning the number of decimal digits that can be represented without change. This is + * the analogue of std::numeric_limits::digits10 + * which is used as the default implementation if specialized. + */ + +template struct GenericNumTraits +{ + enum { + IsInteger = std::numeric_limits::is_integer, + IsSigned = std::numeric_limits::is_signed, + IsComplex = 0, + RequireInitialization = internal::is_arithmetic::value ? 0 : 1, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + typedef T Real; + typedef typename internal::conditional< + IsInteger, + typename internal::conditional::type, + T + >::type NonInteger; + typedef T Nested; + typedef T Literal; + + EIGEN_DEVICE_FUNC + static inline Real epsilon() + { + return numext::numeric_limits::epsilon(); + } + + EIGEN_DEVICE_FUNC + static inline int digits10() + { + return internal::default_digits10_impl::run(); + } + + EIGEN_DEVICE_FUNC + static inline Real dummy_precision() + { + // make sure to override this for floating-point types + return Real(0); + } + + + EIGEN_DEVICE_FUNC + static inline T highest() { + return (numext::numeric_limits::max)(); + } + + EIGEN_DEVICE_FUNC + static inline T lowest() { + return IsInteger ? (numext::numeric_limits::min)() : (-(numext::numeric_limits::max)()); + } + + EIGEN_DEVICE_FUNC + static inline T infinity() { + return numext::numeric_limits::infinity(); + } + + EIGEN_DEVICE_FUNC + static inline T quiet_NaN() { + return numext::numeric_limits::quiet_NaN(); + } +}; + +template struct NumTraits : GenericNumTraits +{}; + +template<> struct NumTraits + : GenericNumTraits +{ + EIGEN_DEVICE_FUNC + static inline float dummy_precision() { return 1e-5f; } +}; + +template<> struct NumTraits : GenericNumTraits +{ + EIGEN_DEVICE_FUNC + static inline double dummy_precision() { return 1e-12; } +}; + +template<> struct NumTraits + : GenericNumTraits +{ + static inline long double dummy_precision() { return 1e-15l; } +}; + +template struct NumTraits > + : GenericNumTraits > +{ + typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; + enum { + IsComplex = 1, + RequireInitialization = NumTraits<_Real>::RequireInitialization, + ReadCost = 2 * NumTraits<_Real>::ReadCost, + AddCost = 2 * NumTraits::AddCost, + MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost + }; + + EIGEN_DEVICE_FUNC + static inline Real epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC + static inline Real dummy_precision() { return NumTraits::dummy_precision(); } + EIGEN_DEVICE_FUNC + static inline int digits10() { return NumTraits::digits10(); } +}; + +template +struct NumTraits > +{ + typedef Array ArrayType; + typedef typename NumTraits::Real RealScalar; + typedef Array Real; + typedef typename NumTraits::NonInteger NonIntegerScalar; + typedef Array NonInteger; + typedef ArrayType & Nested; + typedef typename NumTraits::Literal Literal; + + enum { + IsComplex = NumTraits::IsComplex, + IsInteger = NumTraits::IsInteger, + IsSigned = NumTraits::IsSigned, + RequireInitialization = 1, + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost + }; + + EIGEN_DEVICE_FUNC + static inline RealScalar epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC + static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } +}; + +template<> struct NumTraits + : GenericNumTraits +{ + enum { + RequireInitialization = 1, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + + static inline int digits10() { return 0; } + +private: + static inline std::string epsilon(); + static inline std::string dummy_precision(); + static inline std::string lowest(); + static inline std::string highest(); + static inline std::string infinity(); + static inline std::string quiet_NaN(); +}; + +// Empty specialization for void to allow template specialization based on NumTraits::Real with T==void and SFINAE. +template<> struct NumTraits {}; + +} // end namespace Eigen + +#endif // EIGEN_NUMTRAITS_H diff --git a/thirdparty/eigen/Eigen/src/Core/PermutationMatrix.h b/thirdparty/eigen/Eigen/src/Core/PermutationMatrix.h new file mode 100644 index 000000000..b1fb455b9 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/PermutationMatrix.h @@ -0,0 +1,633 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PERMUTATIONMATRIX_H +#define EIGEN_PERMUTATIONMATRIX_H + +namespace Eigen { + +namespace internal { + +enum PermPermProduct_t {PermPermProduct}; + +} // end namespace internal + +/** \class PermutationBase + * \ingroup Core_Module + * + * \brief Base class for permutations + * + * \tparam Derived the derived class + * + * This class is the base class for all expressions representing a permutation matrix, + * internally stored as a vector of integers. + * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix + * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have: + * \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f] + * This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have: + * \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f] + * + * Permutation matrices are square and invertible. + * + * Notice that in addition to the member functions and operators listed here, there also are non-member + * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase) + * on either side. + * + * \sa class PermutationMatrix, class PermutationWrapper + */ +template +class PermutationBase : public EigenBase +{ + typedef internal::traits Traits; + typedef EigenBase Base; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + enum { + Flags = Traits::Flags, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::StorageIndex StorageIndex; + typedef Matrix + DenseMatrixType; + typedef PermutationMatrix + PlainPermutationType; + typedef PlainPermutationType PlainObject; + using Base::derived; + typedef Inverse InverseReturnType; + typedef void Scalar; + #endif + + /** Copies the other permutation into *this */ + template + Derived& operator=(const PermutationBase& other) + { + indices() = other.indices(); + return derived(); + } + + /** Assignment from the Transpositions \a tr */ + template + Derived& operator=(const TranspositionsBase& tr) + { + setIdentity(tr.size()); + for(Index k=size()-1; k>=0; --k) + applyTranspositionOnTheRight(k,tr.coeff(k)); + return derived(); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Derived& operator=(const PermutationBase& other) + { + indices() = other.indices(); + return derived(); + } + #endif + + /** \returns the number of rows */ + inline Index rows() const { return Index(indices().size()); } + + /** \returns the number of columns */ + inline Index cols() const { return Index(indices().size()); } + + /** \returns the size of a side of the respective square matrix, i.e., the number of indices */ + inline Index size() const { return Index(indices().size()); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (Index i=0; i=0 && j>=0 && i=0 && j>=0 && i + void assignTranspose(const PermutationBase& other) + { + for (Index i=0; i + void assignProduct(const Lhs& lhs, const Rhs& rhs) + { + eigen_assert(lhs.cols() == rhs.rows()); + for (Index i=0; i + inline PlainPermutationType operator*(const PermutationBase& other) const + { return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); } + + /** \returns the product of a permutation with another inverse permutation. + * + * \note \blank \note_try_to_help_rvo + */ + template + inline PlainPermutationType operator*(const InverseImpl& other) const + { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } + + /** \returns the product of an inverse permutation with another permutation. + * + * \note \blank \note_try_to_help_rvo + */ + template friend + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) + { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } + + /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. + * + * This function is O(\c n) procedure allocating a buffer of \c n booleans. + */ + Index determinant() const + { + Index res = 1; + Index n = size(); + Matrix mask(n); + mask.fill(false); + Index r = 0; + while(r < n) + { + // search for the next seed + while(r=n) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + mask.coeffRef(k0) = true; + for(Index k=indices().coeff(k0); k!=k0; k=indices().coeff(k)) + { + mask.coeffRef(k) = true; + res = -res; + } + } + return res; + } + + protected: + +}; + +namespace internal { +template +struct traits > + : traits > +{ + typedef PermutationStorage StorageKind; + typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; +}; +} + +/** \class PermutationMatrix + * \ingroup Core_Module + * + * \brief Permutation matrix + * + * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * \tparam _StorageIndex the integer type of the indices + * + * This class represents a permutation matrix, internally stored as a vector of integers. + * + * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix + */ +template +class PermutationMatrix : public PermutationBase > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + typedef const PermutationMatrix& Nested; + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename Traits::StorageIndex StorageIndex; + #endif + + inline PermutationMatrix() + {} + + /** Constructs an uninitialized permutation matrix of given size. + */ + explicit inline PermutationMatrix(Index size) : m_indices(size) + { + eigen_internal_assert(size <= NumTraits::highest()); + } + + /** Copy constructor. */ + template + inline PermutationMatrix(const PermutationBase& other) + : m_indices(other.indices()) {} + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** Standard copy constructor. Defined only to prevent a default copy constructor + * from hiding the other templated constructor */ + inline PermutationMatrix(const PermutationMatrix& other) : m_indices(other.indices()) {} + #endif + + /** Generic constructor from expression of the indices. The indices + * array has the meaning that the permutations sends each integer i to indices[i]. + * + * \warning It is your responsibility to check that the indices array that you passes actually + * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the + * array's size. + */ + template + explicit inline PermutationMatrix(const MatrixBase& indices) : m_indices(indices) + {} + + /** Convert the Transpositions \a tr to a permutation matrix */ + template + explicit PermutationMatrix(const TranspositionsBase& tr) + : m_indices(tr.size()) + { + *this = tr; + } + + /** Copies the other permutation into *this */ + template + PermutationMatrix& operator=(const PermutationBase& other) + { + m_indices = other.indices(); + return *this; + } + + /** Assignment from the Transpositions \a tr */ + template + PermutationMatrix& operator=(const TranspositionsBase& tr) + { + return Base::operator=(tr.derived()); + } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + PermutationMatrix& operator=(const PermutationMatrix& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + + /**** multiplication helpers to hopefully get RVO ****/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) + { + eigen_internal_assert(m_indices.size() <= NumTraits::highest()); + StorageIndex end = StorageIndex(m_indices.size()); + for (StorageIndex i=0; i + PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) + : m_indices(lhs.indices().size()) + { + Base::assignProduct(lhs,rhs); + } +#endif + + protected: + + IndicesType m_indices; +}; + + +namespace internal { +template +struct traits,_PacketAccess> > + : traits > +{ + typedef PermutationStorage StorageKind; + typedef Map, _PacketAccess> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; +}; +} + +template +class Map,_PacketAccess> + : public PermutationBase,_PacketAccess> > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; + #endif + + inline Map(const StorageIndex* indicesPtr) + : m_indices(indicesPtr) + {} + + inline Map(const StorageIndex* indicesPtr, Index size) + : m_indices(indicesPtr,size) + {} + + /** Copies the other permutation into *this */ + template + Map& operator=(const PermutationBase& other) + { return Base::operator=(other.derived()); } + + /** Assignment from the Transpositions \a tr */ + template + Map& operator=(const TranspositionsBase& tr) + { return Base::operator=(tr.derived()); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Map& operator=(const Map& other) + { + m_indices = other.m_indices; + return *this; + } + #endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + protected: + + IndicesType m_indices; +}; + +template class TranspositionsWrapper; +namespace internal { +template +struct traits > +{ + typedef PermutationStorage StorageKind; + typedef void Scalar; + typedef typename _IndicesType::Scalar StorageIndex; + typedef _IndicesType IndicesType; + enum { + RowsAtCompileTime = _IndicesType::SizeAtCompileTime, + ColsAtCompileTime = _IndicesType::SizeAtCompileTime, + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 + }; +}; +} + +/** \class PermutationWrapper + * \ingroup Core_Module + * + * \brief Class to view a vector of integers as a permutation matrix + * + * \tparam _IndicesType the type of the vector of integer (can be any compatible expression) + * + * This class allows to view any vector expression of integers as a permutation matrix. + * + * \sa class PermutationBase, class PermutationMatrix + */ +template +class PermutationWrapper : public PermutationBase > +{ + typedef PermutationBase Base; + typedef internal::traits Traits; + public: + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + #endif + + inline PermutationWrapper(const IndicesType& indices) + : m_indices(indices) + {} + + /** const version of indices(). */ + const typename internal::remove_all::type& + indices() const { return m_indices; } + + protected: + + typename IndicesType::Nested m_indices; +}; + + +/** \returns the matrix with the permutation applied to the columns. + */ +template +EIGEN_DEVICE_FUNC +const Product +operator*(const MatrixBase &matrix, + const PermutationBase& permutation) +{ + return Product + (matrix.derived(), permutation.derived()); +} + +/** \returns the matrix with the permutation applied to the rows. + */ +template +EIGEN_DEVICE_FUNC +const Product +operator*(const PermutationBase &permutation, + const MatrixBase& matrix) +{ + return Product + (permutation.derived(), matrix.derived()); +} + + +template +class InverseImpl + : public EigenBase > +{ + typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} + public: + typedef Inverse InverseType; + using EigenBase >::derived; + + #ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename PermutationType::DenseMatrixType DenseMatrixType; + enum { + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime + }; + #endif + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const + { + other.setZero(); + for (Index i=0; i friend + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) + { + return Product(matrix.derived(), trPerm.derived()); + } + + /** \returns the matrix with the inverse permutation applied to the rows. + */ + template + const Product + operator*(const MatrixBase& matrix) const + { + return Product(derived(), matrix.derived()); + } +}; + +template +const PermutationWrapper MatrixBase::asPermutation() const +{ + return derived(); +} + +namespace internal { + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/thirdparty/eigen/Eigen/src/Core/PlainObjectBase.h b/thirdparty/eigen/Eigen/src/Core/PlainObjectBase.h new file mode 100644 index 000000000..2dcd929e6 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/PlainObjectBase.h @@ -0,0 +1,1015 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSESTORAGEBASE_H +#define EIGEN_DENSESTORAGEBASE_H + +#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO) +# define EIGEN_INITIALIZE_COEFFS +# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i::quiet_NaN(); +#else +# undef EIGEN_INITIALIZE_COEFFS +# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED +#endif + +namespace Eigen { + +namespace internal { + +template struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE void run(Index, Index) + { + } +}; + +template<> struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC + static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols) + { + // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 + // we assume Index is signed + Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed + bool error = (rows == 0 || cols == 0) ? false + : (rows > max_index / cols); + if (error) + throw_std_bad_alloc(); + } +}; + +template +struct conservative_resize_like_impl; + +template struct matrix_swap_impl; + +} // end namespace internal + +/** \class PlainObjectBase + * \ingroup Core_Module + * \brief %Dense storage base class for matrices and arrays. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. + * + * \sa \ref TopicClassHierarchy + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +namespace doxygen { + +// this is a workaround to doxygen not being able to understand the inheritance logic +// when it is hidden by the dense_xpr_base helper struct. +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template struct dense_xpr_base_dispatcher; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher > + : public MatrixBase > {}; +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template +struct dense_xpr_base_dispatcher > + : public ArrayBase > {}; + +} // namespace doxygen + +template +class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher +#else +template +class PlainObjectBase : public internal::dense_xpr_base::type +#endif +{ + public: + enum { Options = internal::traits::Options }; + typedef typename internal::dense_xpr_base::type Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef Derived DenseType; + + using Base::RowsAtCompileTime; + using Base::ColsAtCompileTime; + using Base::SizeAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::IsVectorAtCompileTime; + using Base::Flags; + + template friend class Eigen::Map; + friend class Eigen::Map; + typedef Eigen::Map MapType; + friend class Eigen::Map; + typedef const Eigen::Map ConstMapType; +#if EIGEN_MAX_ALIGN_BYTES>0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. + friend class Eigen::Map; + friend class Eigen::Map; +#endif + typedef Eigen::Map AlignedMapType; + typedef const Eigen::Map ConstAlignedMapType; + template struct StridedMapType { typedef Eigen::Map type; }; + template struct StridedConstMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + + protected: + DenseStorage m_storage; + + public: + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment>0) }; + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + + EIGEN_DEVICE_FUNC + Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC + const Base& base() const { return *static_cast(this); } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + + /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeff(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + return m_storage.data()[index]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index,Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index) const for details. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + return m_storage.data()[index]; + } + + /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const + { + if(Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const + { + return m_storage.data()[index]; + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const + { + return internal::ploadt + (m_storage.data() + (Flags & RowMajorBit + ? colId + rowId * m_storage.cols() + : rowId + colId * m_storage.rows())); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const + { + return internal::ploadt(m_storage.data() + index); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + internal::pstoret + (m_storage.data() + (Flags & RowMajorBit + ? colId + rowId * m_storage.cols() + : rowId + colId * m_storage.rows()), val); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val) + { + internal::pstoret(m_storage.data() + index, val); + } + + /** \returns a const pointer to the data array of this matrix */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const + { return m_storage.data(); } + + /** \returns a pointer to the data array of this matrix */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() + { return m_storage.data(); } + + /** Resizes \c *this to a \a rows x \a cols matrix. + * + * This method is intended for dynamic-size matrices, although it is legal to call it on any + * matrix as long as fixed dimensions are left unchanged. If you only want to change the number + * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t). + * + * If the current number of coefficients of \c *this exactly matches the + * product \a rows * \a cols, then no memory allocation is performed and + * the current values are left unchanged. In all other cases, including + * shrinking, the data is reallocated and all previous values are lost. + * + * Example: \include Matrix_resize_int_int.cpp + * Output: \verbinclude Matrix_resize_int_int.out + * + * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resize(Index rows, Index cols) + { + eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime) + && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime) + && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array."); + internal::check_rows_cols_for_overflow::run(rows, cols); + #ifdef EIGEN_INITIALIZE_COEFFS + Index size = rows*cols; + bool size_changed = size != this->size(); + m_storage.resize(size, rows, cols); + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #else + m_storage.resize(rows*cols, rows, cols); + #endif + } + + /** Resizes \c *this to a vector of length \a size + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * Example: \include Matrix_resize_int.cpp + * Output: \verbinclude Matrix_resize_int.out + * + * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_DEVICE_FUNC + inline void resize(Index size) + { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) + eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0); + #ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); + #endif + if(RowsAtCompileTime == 1) + m_storage.resize(size, 1, size); + else + m_storage.resize(size, size, 1); + #ifdef EIGEN_INITIALIZE_COEFFS + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #endif + } + + /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the special value \c NoChange + * as in the example below. + * + * Example: \include Matrix_resize_NoChange_int.cpp + * Output: \verbinclude Matrix_resize_NoChange_int.out + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + inline void resize(NoChange_t, Index cols) + { + resize(rows(), cols); + } + + /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange + * as in the example below. + * + * Example: \include Matrix_resize_int_NoChange.cpp + * Output: \verbinclude Matrix_resize_int_NoChange.out + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC + inline void resize(Index rows, NoChange_t) + { + resize(rows, cols()); + } + + /** Resizes \c *this to have the same dimensions as \a other. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) + { + const OtherDerived& other = _other.derived(); + internal::check_rows_cols_for_overflow::run(other.rows(), other.cols()); + const Index othersize = other.rows()*other.cols(); + if(RowsAtCompileTime == 1) + { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(1, othersize); + } + else if(ColsAtCompileTime == 1) + { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(othersize, 1); + } + else resize(other.rows(), other.cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) + { + internal::conservative_resize_like_impl::run(*this, rows, cols); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of columns unchanged. + * + * In case the matrix is growing, new rows will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) + { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows, cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of rows unchanged. + * + * In case the matrix is growing, new columns will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) + { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows(), cols); + } + + /** Resizes the vector to \a size while retaining old values. + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * When values are appended, they will be uninitialized. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index size) + { + internal::conservative_resize_like_impl::run(*this, size); + } + + /** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will copied from \c other. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) + { + internal::conservative_resize_like_impl::run(*this, other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) + { + return _set(other); + } + + /** \sa MatrixBase::lazyAssign() */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) + { + _resize_to_match(other); + return Base::lazyAssign(other.derived()); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) + { + resize(func.rows(), func.cols()); + return Base::operator=(func); + } + + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() + { +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + // FIXME is it still needed ? + /** \internal */ + EIGEN_DEVICE_FUNC + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) + : m_storage(internal::constructor_without_unaligned_array_assert()) + { +// _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } +#endif + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT + : m_storage( std::move(other.m_storage) ) + { + } + + EIGEN_DEVICE_FUNC + PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT + { + using std::swap; + swap(m_storage, other.m_storage); + return *this; + } +#endif + + /** Copy constructor */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) + : Base(), m_storage(other.m_storage) { } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) + : m_storage(size, rows, cols) + { +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase &other) + : m_storage() + { + _check_template_params(); + resizeLike(other); + _set_noalias(other); + } + + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) + : m_storage() + { + _check_template_params(); + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue& other) + { + _check_template_params(); + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + public: + + /** \copydoc DenseBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) + { + _resize_to_match(other); + Base::operator=(other.derived()); + return this->derived(); + } + + /** \name Map + * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, + * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned + * \a data pointers. + * + * \see class Map + */ + //@{ + static inline ConstMapType Map(const Scalar* data) + { return ConstMapType(data); } + static inline MapType Map(Scalar* data) + { return MapType(data); } + static inline ConstMapType Map(const Scalar* data, Index size) + { return ConstMapType(data, size); } + static inline MapType Map(Scalar* data, Index size) + { return MapType(data, size); } + static inline ConstMapType Map(const Scalar* data, Index rows, Index cols) + { return ConstMapType(data, rows, cols); } + static inline MapType Map(Scalar* data, Index rows, Index cols) + { return MapType(data, rows, cols); } + + static inline ConstAlignedMapType MapAligned(const Scalar* data) + { return ConstAlignedMapType(data); } + static inline AlignedMapType MapAligned(Scalar* data) + { return AlignedMapType(data); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size) + { return ConstAlignedMapType(data, size); } + static inline AlignedMapType MapAligned(Scalar* data, Index size) + { return AlignedMapType(data, size); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols) + { return ConstAlignedMapType(data, rows, cols); } + static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols) + { return AlignedMapType(data, rows, cols); } + + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, const Stride& stride) + { return typename StridedConstMapType >::type(data, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, const Stride& stride) + { return typename StridedMapType >::type(data, stride); } + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, Index size, const Stride& stride) + { return typename StridedConstMapType >::type(data, size, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, Index size, const Stride& stride) + { return typename StridedMapType >::type(data, size, stride); } + template + static inline typename StridedConstMapType >::type Map(const Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedConstMapType >::type(data, rows, cols, stride); } + template + static inline typename StridedMapType >::type Map(Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedMapType >::type(data, rows, cols, stride); } + + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, stride); } + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, Index size, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, size, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, Index size, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, size, stride); } + template + static inline typename StridedConstAlignedMapType >::type MapAligned(const Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedConstAlignedMapType >::type(data, rows, cols, stride); } + template + static inline typename StridedAlignedMapType >::type MapAligned(Scalar* data, Index rows, Index cols, const Stride& stride) + { return typename StridedAlignedMapType >::type(data, rows, cols, stride); } + //@} + + using Base::setConstant; + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val); + + using Base::setZero; + EIGEN_DEVICE_FUNC Derived& setZero(Index size); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols); + + using Base::setOnes; + EIGEN_DEVICE_FUNC Derived& setOnes(Index size); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols); + + using Base::setRandom; + Derived& setRandom(Index size); + Derived& setRandom(Index rows, Index cols); + + #ifdef EIGEN_PLAINOBJECTBASE_PLUGIN + #include EIGEN_PLAINOBJECTBASE_PLUGIN + #endif + + protected: + /** \internal Resizes *this in preparation for assigning \a other to it. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) + { + #ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert((this->size()==0 || (IsVectorAtCompileTime ? (this->size() == other.size()) + : (rows() == other.rows() && cols() == other.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + EIGEN_ONLY_USED_FOR_DEBUG(other); + #else + resizeLike(other); + #endif + } + + /** + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + * + * \sa operator=(const MatrixBase&), _set_noalias() + * + * \internal + */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) + { + internal::call_assignment(this->derived(), other.derived()); + return this->derived(); + } + + /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which + * is the case when creating a new matrix) so one can enforce lazy evaluation. + * + * \sa operator=(const MatrixBase&), _set() + */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) + { + // I don't think we need this resize call since the lazyAssign will anyways resize + // and lazyAssign will be called by the assign selector. + //_resize_to_match(other); + // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because + // it wouldn't allow to copy a row-vector into a column-vector. + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + return this->derived(); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger) && + bool(NumTraits::IsInteger), + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(rows,cols); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==2,T1>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible::value) + && ((!internal::is_same::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) + { + // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer = NumTraits::IsInteger; + EIGEN_STATIC_ASSERT(is_integer, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(size); + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = val0; + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==1 + && internal::is_convertible::value,T*>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); + } + + // Initialize a fixed size matrix from a pointer to raw data + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar* data){ + this->_set_noalias(ConstMapType(data)); + } + + // Initialize an arbitrary matrix from a dense expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const DenseBase& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from a generic Eigen expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const EigenBase& other){ + this->derived() = other; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) + { + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const RotationBase& r) + { + this->derived() = r; + } + + // For fixed -size arrays: + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + + template + friend struct internal::matrix_swap_impl; + + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase & other) + { + enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; + internal::matrix_swap_impl::run(this->derived(), other.derived()); + } + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase const & other) + { Base::swap(other.derived()); } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void _check_template_params() + { + EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor) + && EIGEN_IMPLIES(MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1, (Options&RowMajor)==0) + && ((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0)) + && ((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0)) + && ((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0)) + && ((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0)) + && (MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime==Dynamic) + && (MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime==Dynamic) + && (Options & (DontAlign|RowMajor)) == Options), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + } + + enum { IsPlainObjectBase = 1 }; +#endif +}; + +namespace internal { + +template +struct conservative_resize_like_impl +{ + static void run(DenseBase& _this, Index rows, Index cols) + { + if (_this.rows() == rows && _this.cols() == cols) return; + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + + if ( ( Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == rows) ) // column-major and we change only the number of columns + { + internal::check_rows_cols_for_overflow::run(rows, cols); + _this.derived().m_storage.conservativeResize(rows*cols,rows,cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(rows,cols); + const Index common_rows = numext::mini(rows, _this.rows()); + const Index common_cols = numext::mini(cols, _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } + } + + static void run(DenseBase& _this, const DenseBase& other) + { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index), + // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the + // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or + // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like + // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good. + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived) + + if ( ( Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == other.rows()) ) // column-major and we change only the number of columns + { + const Index new_rows = other.rows() - _this.rows(); + const Index new_cols = other.cols() - _this.cols(); + _this.derived().m_storage.conservativeResize(other.size(),other.rows(),other.cols()); + if (new_rows>0) + _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows); + else if (new_cols>0) + _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols); + } + else + { + // The storage order does not allow us to use reallocation. + typename Derived::PlainObject tmp(other); + const Index common_rows = numext::mini(tmp.rows(), _this.rows()); + const Index common_cols = numext::mini(tmp.cols(), _this.cols()); + tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); + _this.derived().swap(tmp); + } + } +}; + +// Here, the specialization for vectors inherits from the general matrix case +// to allow calling .conservativeResize(rows,cols) on vectors. +template +struct conservative_resize_like_impl + : conservative_resize_like_impl +{ + using conservative_resize_like_impl::run; + + static void run(DenseBase& _this, Index size) + { + const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; + const Index new_cols = Derived::RowsAtCompileTime==1 ? size : 1; + _this.derived().m_storage.conservativeResize(size,new_rows,new_cols); + } + + static void run(DenseBase& _this, const DenseBase& other) + { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + const Index num_new_elements = other.size() - _this.size(); + + const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : other.rows(); + const Index new_cols = Derived::RowsAtCompileTime==1 ? other.cols() : 1; + _this.derived().m_storage.conservativeResize(other.size(),new_rows,new_cols); + + if (num_new_elements > 0) + _this.tail(num_new_elements) = other.tail(num_new_elements); + } +}; + +template +struct matrix_swap_impl +{ + EIGEN_DEVICE_FUNC + static inline void run(MatrixTypeA& a, MatrixTypeB& b) + { + a.base().swap(b); + } +}; + +template +struct matrix_swap_impl +{ + EIGEN_DEVICE_FUNC + static inline void run(MatrixTypeA& a, MatrixTypeB& b) + { + static_cast(a).m_storage.swap(static_cast(b).m_storage); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSESTORAGEBASE_H diff --git a/thirdparty/eigen/Eigen/src/Core/Product.h b/thirdparty/eigen/Eigen/src/Core/Product.h new file mode 100644 index 000000000..ae0c94b38 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Product.h @@ -0,0 +1,186 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +namespace Eigen { + +template class ProductImpl; + +namespace internal { + +template +struct traits > +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type StorageIndex; + + enum { + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit + : NoPreferredStorageOrderBit + }; +}; + +} // end namespace internal + +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \tparam _Lhs the type of the left-hand side expression + * \tparam _Rhs the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ +template +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> +{ + public: + + typedef _Lhs Lhs; + typedef _Rhs Rhs; + + typedef typename ProductImpl< + Lhs, Rhs, Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + { + eigen_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } + + protected: + + LhsNested m_lhs; + RhsNested m_rhs; +}; + +namespace internal { + +template::ret> +class dense_product_base + : public internal::dense_xpr_base >::type +{}; + +/** Convertion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base >::type +{ + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + + operator const Scalar() const + { + return internal::evaluator(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl + : public internal::dense_product_base +{ + typedef Product Derived; + + public: + + typedef typename internal::dense_product_base Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(i); + } + + +}; + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/thirdparty/eigen/Eigen/src/Core/ProductEvaluators.h b/thirdparty/eigen/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 000000000..c9e2e1a07 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,1101 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator > + : public product_evaluator > +{ + typedef Product XprType; + typedef product_evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" +// TODO we should apply that rule only if that's really helpful +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > +{ + static const bool value = true; +}; +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > +{ + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) + {} +}; + + +template +struct evaluator, DiagIndex> > + : public evaluator, DiagIndex> > +{ + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} +}; + + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; + +template +struct evaluator_assume_aliasing > { + static const bool value = true; +}; + +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape> + : public evaluator::PlainObject> +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here?, +// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::add_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::sub_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, + const Product >, AssignFunc, Dense2Dense> +{ + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); + } +}; + +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + +template +struct assignment_from_xpr_op_product +{ + template + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) + { + call_assignment_no_alias(dst, src.lhs(), Func1()); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ + template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ + struct Assignment, const OtherXpr, \ + const Product >, internal::ASSIGN_OP, Dense2Dense> \ + : assignment_from_xpr_op_product, internal::ASSIGN_OP, internal::ASSIGN_OP2 > \ + {} + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); + +//---------------------------------------- + +template +struct generic_product_impl +{ + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } +}; + + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + evaluator rhsEval(rhs); + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dst.cols(); + for (Index j=0; j +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +{ + evaluator lhsEval(lhs); + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dst.rows(); + for (Index i=0; i +struct generic_product_impl +{ + template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } + + template + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } + +}; + + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base +{ + typedef typename Product::Scalar Scalar; + + template + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + +}; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::conditional::type MatrixType; + + template + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::gemv_dense_selector::HasUsableDirectAccess) + >::run(lhs, rhs, dst, alpha); + } +}; + +template +struct generic_product_impl +{ + typedef typename Product::Scalar Scalar; + + template + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + } + + template + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + } + + template + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + } + +// template +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator, ProductTag, DenseShape, DenseShape> + : evaluator_base > +{ + typedef Product XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif + } + + // Everything below here is taken from CoeffBasedProduct.h + + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + typedef evaluator LhsEtorType; + typedef evaluator RhsEtorType; + + enum { + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime + }; + + typedef typename find_best_packet::type LhsVecPacketType; + typedef typename find_best_packet::type RhsVecPacketType; + + enum { + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost + : InnerSize == Dynamic ? HugeCost + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + LhsVecPacketSize = unpacket_traits::size, + RhsVecPacketSize = unpacket_traits::size, + + // Here, we don't care about alignment larger than the usable packet size. + LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), + RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), + + SameType = is_same::value, + + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (bool(RhsRowMajor) && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), + + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + : 0, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % packet_traits::size == 0) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const + { + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector + */ + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const + { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + template + const PacketType packet(Index row, Index col) const + { + PacketType res; + typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + const PacketType packet(Index index) const + { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; + return packet(row,col); + } + +protected: + typename internal::add_const_on_value_type::type m_lhs; + typename internal::add_const_on_value_type::type m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +template +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> +{ + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} +}; + +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet(Index(UnrollingIndex-1), col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet(row, Index(UnrollingIndex-1)), pset1(rhs.coeff(Index(UnrollingIndex-1), col)), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, Index(0))),rhs.template packet(Index(0), col)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(typename unpacket_traits::type(0)); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(typename unpacket_traits::type(0)); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template +struct diagonal_product_evaluator_base + : evaluator_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; +public: + enum { + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator::Alignment + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + evaluator m_diagImpl; + evaluator m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> + : diagonal_product_evaluator_base, OnTheLeft> +{ + typedef diagonal_product_evaluator_base, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_impl(row,col, row, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> + : diagonal_product_evaluator_base, OnTheRight> +{ + typedef diagonal_product_evaluator_base, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + return this->template packet_impl(row,col, col, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +/** \internal + * \class permutation_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h + */ +template +struct permutation_matrix_product; + +template +struct permutation_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) + { + MatrixType mat(xpr); + const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + //if(is_same::value && extract_data(dst) == extract_data(mat)) + if(is_same_dense(dst, mat)) + { + // apply the permutation inplace + Matrix mask(perm.size()); + mask.fill(false); + Index r = 0; + while(r < perm.size()) + { + // search for the next seed + while(r=perm.size()) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(Index i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) + + = + + Block + (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); + } + } + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) + { + permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + + +/*************************************************************************** +* Products with transpositions matrices +***************************************************************************/ + +// FIXME could we unify Transpositions and Permutation into a single "shape"?? + +/** \internal + * \class transposition_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + */ +template +struct transposition_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) + { + MatrixType mat(xpr); + typedef typename TranspositionType::StorageIndex StorageIndex; + const Index size = tr.size(); + StorageIndex j = 0; + + if(!is_same_dense(dst,mat)) + dst = mat; + + for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, rhs, lhs); + } +}; + + +template +struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + { + transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/thirdparty/eigen/Eigen/src/Core/Random.h b/thirdparty/eigen/Eigen/src/Core/Random.h new file mode 100644 index 000000000..6faf789c7 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Random.h @@ -0,0 +1,182 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_H +#define EIGEN_RANDOM_H + +namespace Eigen { + +namespace internal { + +template struct scalar_random_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) + inline const Scalar operator() () const { return random(); } +}; + +template +struct functor_traits > +{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false, IsRepeatable = false }; }; + +} // end namespace internal + +/** \returns a random matrix expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * \not_reentrant + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used + * instead. + * + * + * Example: \include MatrixBase_random_int_int.cpp + * Output: \verbinclude MatrixBase_random_int_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() + */ +template +inline const typename DenseBase::RandomReturnType +DenseBase::Random(Index rows, Index cols) +{ + return NullaryExpr(rows, cols, internal::scalar_random_op()); +} + +/** \returns a random vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * \not_reentrant + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Random() should be used + * instead. + * + * Example: \include MatrixBase_random_int.cpp + * Output: \verbinclude MatrixBase_random_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary vector whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() + */ +template +inline const typename DenseBase::RandomReturnType +DenseBase::Random(Index size) +{ + return NullaryExpr(size, internal::scalar_random_op()); +} + +/** \returns a fixed-size random matrix or vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_random.cpp + * Output: \verbinclude MatrixBase_random.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \not_reentrant + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) + */ +template +inline const typename DenseBase::RandomReturnType +DenseBase::Random() +{ + return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); +} + +/** Sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * Example: \include MatrixBase_setRandom.cpp + * Output: \verbinclude MatrixBase_setRandom.out + * + * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) + */ +template +inline Derived& DenseBase::setRandom() +{ + return *this = Random(rows(), cols()); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \only_for_vectors + * \not_reentrant + * + * Example: \include Matrix_setRandom_int.cpp + * Output: \verbinclude Matrix_setRandom_int.out + * + * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setRandom(Index newSize) +{ + resize(newSize); + return setRandom(); +} + +/** Resizes to the given size, and sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setRandom_int_int.cpp + * Output: \verbinclude Matrix_setRandom_int_int.out + * + * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setRandom(Index rows, Index cols) +{ + resize(rows, cols); + return setRandom(); +} + +} // end namespace Eigen + +#endif // EIGEN_RANDOM_H diff --git a/thirdparty/eigen/Eigen/src/Core/Redux.h b/thirdparty/eigen/Eigen/src/Core/Redux.h new file mode 100644 index 000000000..b6e8f8887 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Redux.h @@ -0,0 +1,505 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REDUX_H +#define EIGEN_REDUX_H + +namespace Eigen { + +namespace internal { + +// TODO +// * implement other kind of vectorization +// * factorize code + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for vectorization and unrolling +***************************************************************************/ + +template +struct redux_traits +{ +public: + typedef typename find_best_packet::type PacketType; + enum { + PacketSize = unpacket_traits::size, + InnerMaxSize = int(Derived::IsRowMajor) + ? Derived::MaxColsAtCompileTime + : Derived::MaxRowsAtCompileTime + }; + + enum { + MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) + && (functor_traits::PacketAccess), + MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), + MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize + }; + +public: + enum { + Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(DefaultTraversal) + }; + +public: + enum { + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) + }; + +public: + enum { + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : unrollers +***************************************************************************/ + +/*** no vectorization ***/ + +template +struct redux_novec_unroller +{ + enum { + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + return func(redux_novec_unroller::run(mat,func), + redux_novec_unroller::run(mat,func)); + } +}; + +template +struct redux_novec_unroller +{ + enum { + outer = Start / Derived::InnerSizeAtCompileTime, + inner = Start % Derived::InnerSizeAtCompileTime + }; + + typedef typename Derived::Scalar Scalar; + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) + { + return mat.coeffByOuterInner(outer, inner); + } +}; + +// This is actually dead code and will never be called. It is required +// to prevent false warnings regarding failed inlining though +// for 0 length run() will never be called at all. +template +struct redux_novec_unroller +{ + typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } +}; + +/*** vectorization ***/ + +template +struct redux_vec_unroller +{ + enum { + PacketSize = redux_traits::PacketSize, + HalfLength = Length/2 + }; + + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) + { + return func.packetOp( + redux_vec_unroller::run(mat,func), + redux_vec_unroller::run(mat,func) ); + } +}; + +template +struct redux_vec_unroller +{ + enum { + index = Start * redux_traits::PacketSize, + outer = index / int(Derived::InnerSizeAtCompileTime), + inner = index % int(Derived::InnerSizeAtCompileTime), + alignment = Derived::Alignment + }; + + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) + { + return mat.template packetByOuterInner(outer, inner); + } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template::Traversal, + int Unrolling = redux_traits::Unrolling +> +struct redux_impl; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + Scalar res; + res = mat.coeffByOuterInner(0, 0); + for(Index i = 1; i < mat.innerSize(); ++i) + res = func(res, mat.coeffByOuterInner(0, i)); + for(Index i = 1; i < mat.outerSize(); ++i) + for(Index j = 0; j < mat.innerSize(); ++j) + res = func(res, mat.coeffByOuterInner(i, j)); + return res; + } +}; + +template +struct redux_impl + : public redux_novec_unroller +{}; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + static Scalar run(const Derived &mat, const Func& func) + { + const Index size = mat.size(); + + const Index packetSize = redux_traits::PacketSize; + const int packetAlignment = unpacket_traits::alignment; + enum { + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) + }; + const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); + const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); + const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); + const Index alignedEnd2 = alignedStart + alignedSize2; + const Index alignedEnd = alignedStart + alignedSize; + Scalar res; + if(alignedSize) + { + PacketScalar packet_res0 = mat.template packet(alignedStart); + if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop + { + PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); + for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) + { + packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); + packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); + } + + packet_res0 = func.packetOp(packet_res0,packet_res1); + if(alignedEnd>alignedEnd2) + packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); + } + res = func.predux(packet_res0); + + for(Index index = 0; index < alignedStart; ++index) + res = func(res,mat.coeff(index)); + + for(Index index = alignedEnd; index < size; ++index) + res = func(res,mat.coeff(index)); + } + else // too small to vectorize anything. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. + { + res = mat.coeff(0); + for(Index index = 1; index < size; ++index) + res = func(res,mat.coeff(index)); + } + + return res; + } +}; + +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename redux_traits::PacketType PacketType; + + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + const Index innerSize = mat.innerSize(); + const Index outerSize = mat.outerSize(); + enum { + packetSize = redux_traits::PacketSize + }; + const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; + Scalar res; + if(packetedInnerSize) + { + PacketType packet_res = mat.template packet(0,0); + for(Index j=0; j(j,i)); + + res = func.predux(packet_res); + for(Index j=0; j::run(mat, func); + } + + return res; + } +}; + +template +struct redux_impl +{ + typedef typename Derived::Scalar Scalar; + + typedef typename redux_traits::PacketType PacketScalar; + enum { + PacketSize = redux_traits::PacketSize, + Size = Derived::SizeAtCompileTime, + VectorizedSize = (Size / PacketSize) * PacketSize + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) + { + eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); + if (VectorizedSize > 0) { + Scalar res = func.predux(redux_vec_unroller::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,redux_novec_unroller::run(mat,func)); + return res; + } + else { + return redux_novec_unroller::run(mat,func); + } + } +}; + +// evaluator adaptor +template +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator::CoeffReadCost, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template + PacketType packet(Index row, Index col) const + { return m_evaluator.template packet(row, col); } + + template + PacketType packet(Index index) const + { return m_evaluator.template packet(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + PacketType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + const XprType & nestedExpression() const { return m_xpr; } + +protected: + internal::evaluator m_evaluator; + const XprType &m_xpr; +}; + +} // end namespace internal + +/*************************************************************************** +* Part 4 : public API +***************************************************************************/ + + +/** \returns the result of a full redux operation on the whole matrix or vector using \a func + * + * The template parameter \a BinaryOp is the type of the functor \a func which must be + * an associative operator. Both current C++98 and C++11 functor styles are handled. + * + * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() + */ +template +template +typename internal::traits::Scalar +DenseBase::redux(const Func& func) const +{ + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl::run(thisEval, func); +} + +/** \returns the minimum of all coefficients of \c *this. + * \warning the result is undefined if \c *this contains NaN. + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::minCoeff() const +{ + return derived().redux(Eigen::internal::scalar_min_op()); +} + +/** \returns the maximum of all coefficients of \c *this. + * \warning the result is undefined if \c *this contains NaN. + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::maxCoeff() const +{ + return derived().redux(Eigen::internal::scalar_max_op()); +} + +/** \returns the sum of all coefficients of \c *this + * + * If \c *this is empty, then the value 0 is returned. + * + * \sa trace(), prod(), mean() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::sum() const +{ + if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) + return Scalar(0); + return derived().redux(Eigen::internal::scalar_sum_op()); +} + +/** \returns the mean of all coefficients of *this +* +* \sa trace(), prod(), sum() +*/ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::mean() const +{ +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif +} + +/** \returns the product of all coefficients of *this + * + * Example: \include MatrixBase_prod.cpp + * Output: \verbinclude MatrixBase_prod.out + * + * \sa sum(), mean(), trace() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +DenseBase::prod() const +{ + if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) + return Scalar(1); + return derived().redux(Eigen::internal::scalar_product_op()); +} + +/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. + * + * \c *this can be any matrix, not necessarily square. + * + * \sa diagonal(), sum() + */ +template +EIGEN_STRONG_INLINE typename internal::traits::Scalar +MatrixBase::trace() const +{ + return derived().diagonal().sum(); +} + +} // end namespace Eigen + +#endif // EIGEN_REDUX_H diff --git a/thirdparty/eigen/Eigen/src/Core/Ref.h b/thirdparty/eigen/Eigen/src/Core/Ref.h new file mode 100644 index 000000000..bdf24f52a --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Ref.h @@ -0,0 +1,281 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REF_H +#define EIGEN_REF_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : public traits > +{ + typedef _PlainObjectType PlainObjectType; + typedef _StrideType StrideType; + enum { + Options = _Options, + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment + }; + + template struct match { + enum { + HasDirectAccess = internal::has_direct_access::ret, + StorageOrderMatch = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime || ((PlainObjectType::Flags&RowMajorBit)==(Derived::Flags&RowMajorBit)), + InnerStrideMatch = int(StrideType::InnerStrideAtCompileTime)==int(Dynamic) + || int(StrideType::InnerStrideAtCompileTime)==int(Derived::InnerStrideAtCompileTime) + || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), + OuterStrideMatch = Derived::IsVectorAtCompileTime + || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), + // NOTE, this indirection of evaluator::Alignment is needed + // to workaround a very strange bug in MSVC related to the instantiation + // of has_*ary_operator in evaluator. + // This line is surprisingly very sensitive. For instance, simply adding parenthesis + // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... + DerivedAlignment = int(evaluator::Alignment), + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch + }; + typedef typename internal::conditional::type type; + }; + +}; + +template +struct traits > : public traits {}; + +} + +template class RefBase + : public MapBase +{ + typedef typename internal::traits::PlainObjectType PlainObjectType; + typedef typename internal::traits::StrideType StrideType; + +public: + + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) + + EIGEN_DEVICE_FUNC inline Index innerStride() const + { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + EIGEN_DEVICE_FUNC inline Index outerStride() const + { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags)&RowMajorBit ? this->cols() + : this->rows(); + } + + EIGEN_DEVICE_FUNC RefBase() + : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), + // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: + m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, + StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime) + {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase) + +protected: + + typedef Stride StrideBase; + + template + EIGEN_DEVICE_FUNC void construct(Expression& expr) + { + if(PlainObjectType::RowsAtCompileTime==1) + { + eigen_assert(expr.rows()==1 || expr.cols()==1); + ::new (static_cast(this)) Base(expr.data(), 1, expr.size()); + } + else if(PlainObjectType::ColsAtCompileTime==1) + { + eigen_assert(expr.rows()==1 || expr.cols()==1); + ::new (static_cast(this)) Base(expr.data(), expr.size(), 1); + } + else + ::new (static_cast(this)) Base(expr.data(), expr.rows(), expr.cols()); + + if(Expression::IsVectorAtCompileTime && (!PlainObjectType::IsVectorAtCompileTime) && ((Expression::Flags&RowMajorBit)!=(PlainObjectType::Flags&RowMajorBit))) + ::new (&m_stride) StrideBase(expr.innerStride(), StrideType::InnerStrideAtCompileTime==0?0:1); + else + ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(), + StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride()); + } + + StrideBase m_stride; +}; + +/** \class Ref + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing expression + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), + * but accepts a variable outer stride (leading dimension). + * This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. + * A Ref<> object can represent either a const expression or a l-value: + * \code + * // in-out argument: + * void foo1(Ref x); + * + * // read-only const argument: + * void foo2(const Ref& x); + * \endcode + * + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. + * Likewise, a Ref can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) + * can be greater than the number of rows. + * + * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. + * Here are some examples: + * \code + * MatrixXf A; + * VectorXf a; + * foo1(a.head()); // OK + * foo1(A.col()); // OK + * foo1(A.row()); // Compilation error because here innerstride!=1 + * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object + * foo2(A.row().transpose()); // The row is copied into a contiguous temporary + * foo2(2*a); // The expression is evaluated into a temporary + * foo2(A.col().segment(2,4)); // No temporary + * \endcode + * + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. + * Here is an example accepting an innerstride!=1: + * \code + * // in-out argument: + * void foo3(Ref > x); + * foo3(A.row()); // OK + * \endcode + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a + * template function, e.g.: + * \code + * // in the .h: + * void foo(const Ref& A); + * void foo(const Ref >& A); + * + * // in the .cpp: + * template void foo_impl(const TypeOfA& A) { + * ... // crazy code goes here + * } + * void foo(const Ref& A) { foo_impl(A); } + * void foo(const Ref >& A) { foo_impl(A); } + * \endcode + * + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ +template class Ref + : public RefBase > +{ + private: + typedef internal::traits Traits; + template + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); + public: + + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + { + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + Base::construct(expr.derived()); + } + template + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + #else + /** Implicit constructor from any dense expression */ + template + inline Ref(DenseBase& expr) + #endif + { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + Base::construct(expr.const_cast_derived()); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref) + +}; + +// this is the const ref version +template class Ref + : public RefBase > +{ + typedef internal::traits Traits; + public: + + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + template + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) + { +// std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; +// std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; +// std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n"; + construct(expr.derived(), typename Traits::template match::type()); + } + + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + template + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + construct(other.derived(), typename Traits::template match::type()); + } + + protected: + + template + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) + { + Base::construct(expr); + } + + template + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) + { + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); + Base::construct(m_object); + } + + protected: + TPlainObjectType m_object; +}; + +} // end namespace Eigen + +#endif // EIGEN_REF_H diff --git a/thirdparty/eigen/Eigen/src/Core/Replicate.h b/thirdparty/eigen/Eigen/src/Core/Replicate.h new file mode 100644 index 000000000..9960ef884 --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Replicate.h @@ -0,0 +1,142 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REPLICATE_H +#define EIGEN_REPLICATE_H + +namespace Eigen { + +namespace internal { +template +struct traits > + : traits +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + enum { + RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic + ? Dynamic + : RowFactor * MatrixType::RowsAtCompileTime, + ColsAtCompileTime = ColFactor==Dynamic || int(MatrixType::ColsAtCompileTime)==Dynamic + ? Dynamic + : ColFactor * MatrixType::ColsAtCompileTime, + //FIXME we don't propagate the max sizes !!! + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 + : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 + : (MatrixType::Flags & RowMajorBit) ? 1 : 0, + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 + }; +}; +} + +/** + * \class Replicate + * \ingroup Core_Module + * + * \brief Expression of the multiple replication of a matrix or vector + * + * \tparam MatrixType the type of the object we are replicating + * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic. + * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic. + * + * This class represents an expression of the multiple replication of a matrix or vector. + * It is the return type of DenseBase::replicate() and most of the time + * this is the only way it is used. + * + * \sa DenseBase::replicate() + */ +template class Replicate + : public internal::dense_xpr_base< Replicate >::type +{ + typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; + typedef typename internal::traits::_MatrixTypeNested _MatrixTypeNested; + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all::type NestedExpression; + + template + EIGEN_DEVICE_FUNC + inline explicit Replicate(const OriginalMatrixType& matrix) + : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) + { + EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + eigen_assert(RowFactor!=Dynamic && ColFactor!=Dynamic); + } + + template + EIGEN_DEVICE_FUNC + inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) + { + EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + } + + EIGEN_DEVICE_FUNC + inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC + inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } + + EIGEN_DEVICE_FUNC + const _MatrixTypeNested& nestedExpression() const + { + return m_matrix; + } + + protected: + MatrixTypeNested m_matrix; + const internal::variable_if_dynamic m_rowFactor; + const internal::variable_if_dynamic m_colFactor; +}; + +/** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate.cpp + * Output: \verbinclude MatrixBase_replicate.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate + */ +template +template +const Replicate +DenseBase::replicate() const +{ + return Replicate(derived()); +} + +/** + * \return an expression of the replication of each column (or row) of \c *this + * + * Example: \include DirectionWise_replicate_int.cpp + * Output: \verbinclude DirectionWise_replicate_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ +template +const typename VectorwiseOp::ReplicateReturnType +VectorwiseOp::replicate(Index factor) const +{ + return typename VectorwiseOp::ReplicateReturnType + (_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1); +} + +} // end namespace Eigen + +#endif // EIGEN_REPLICATE_H diff --git a/thirdparty/eigen/Eigen/src/Core/ReturnByValue.h b/thirdparty/eigen/Eigen/src/Core/ReturnByValue.h new file mode 100644 index 000000000..c44b7673b --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/ReturnByValue.h @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// Copyright (C) 2009-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RETURNBYVALUE_H +#define EIGEN_RETURNBYVALUE_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : public traits::ReturnType> +{ + enum { + // We're disabling the DirectAccess because e.g. the constructor of + // the Block-with-DirectAccess expression requires to have a coeffRef method. + // Also, we don't want to have to implement the stride stuff. + Flags = (traits::ReturnType>::Flags + | EvalBeforeNestingBit) & ~DirectAccessBit + }; +}; + +/* The ReturnByValue object doesn't even have a coeff() method. + * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. + * So internal::nested always gives the plain return matrix type. + * + * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators + */ +template +struct nested_eval, n, PlainObject> +{ + typedef typename traits::ReturnType type; +}; + +} // end namespace internal + +/** \class ReturnByValue + * \ingroup Core_Module + * + */ +template class ReturnByValue + : public internal::dense_xpr_base< ReturnByValue >::type, internal::no_assignment_operator +{ + public: + typedef typename internal::traits::ReturnType ReturnType; + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) + + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const + { static_cast(this)->evalTo(dst); } + EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast(this)->cols(); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +#define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT + class Unusable{ + Unusable(const Unusable&) {} + Unusable& operator=(const Unusable&) {return *this;} + }; + const Unusable& coeff(Index) const { return *reinterpret_cast(this); } + const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } + Unusable& coeffRef(Index) { return *reinterpret_cast(this); } + Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } +#undef Unusable +#endif +}; + +template +template +Derived& DenseBase::operator=(const ReturnByValue& other) +{ + other.evalTo(derived()); + return derived(); +} + +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template +struct evaluator > + : public evaluator::ReturnType> +{ + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_RETURNBYVALUE_H diff --git a/thirdparty/eigen/Eigen/src/Core/Reverse.h b/thirdparty/eigen/Eigen/src/Core/Reverse.h new file mode 100644 index 000000000..0640cda2a --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Reverse.h @@ -0,0 +1,211 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009 Ricard Marxer +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REVERSE_H +#define EIGEN_REVERSE_H + +namespace Eigen { + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type MatrixTypeNested; + typedef typename remove_reference::type _MatrixTypeNested; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) + }; +}; + +template struct reverse_packet_cond +{ + static inline PacketType run(const PacketType& x) { return preverse(x); } +}; + +template struct reverse_packet_cond +{ + static inline PacketType run(const PacketType& x) { return x; } +}; + +} // end namespace internal + +/** \class Reverse + * \ingroup Core_Module + * + * \brief Expression of the reverse of a vector or matrix + * + * \tparam MatrixType the type of the object of which we are taking the reverse + * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections + * + * This class represents an expression of the reverse of a vector. + * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::reverse(), VectorwiseOp::reverse() + */ +template class Reverse + : public internal::dense_xpr_base< Reverse >::type +{ + public: + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all::type NestedExpression; + using Base::IsRowMajor; + + protected: + enum { + PacketSize = internal::packet_traits::size, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor) + }; + typedef internal::reverse_packet_cond reverse_packet; + public: + + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } + + EIGEN_DEVICE_FUNC inline Index innerStride() const + { + return -m_matrix.innerStride(); + } + + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& + nestedExpression() const + { + return m_matrix; + } + + protected: + typename MatrixType::Nested m_matrix; +}; + +/** \returns an expression of the reverse of *this. + * + * Example: \include MatrixBase_reverse.cpp + * Output: \verbinclude MatrixBase_reverse.out + * + */ +template +inline typename DenseBase::ReverseReturnType +DenseBase::reverse() +{ + return ReverseReturnType(derived()); +} + + +//reverse const overload moved DenseBase.h due to a CUDA compiler bug + +/** This is the "in place" version of reverse: it reverses \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * - it allows future optimizations (cache friendliness, etc.) + * + * \sa VectorwiseOp::reverseInPlace(), reverse() */ +template +inline void DenseBase::reverseInPlace() +{ + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } +} + +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +void VectorwiseOp::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_REVERSE_H diff --git a/thirdparty/eigen/Eigen/src/Core/Select.h b/thirdparty/eigen/Eigen/src/Core/Select.h new file mode 100644 index 000000000..79eec1b5b --- /dev/null +++ b/thirdparty/eigen/Eigen/src/Core/Select.h @@ -0,0 +1,162 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELECT_H +#define EIGEN_SELECT_H + +namespace Eigen { + +/** \class Select + * \ingroup Core_Module + * + * \brief Expression of a coefficient wise version of the C++ ternary operator ?: + * + * \param ConditionMatrixType the type of the \em condition expression which must be a boolean matrix + * \param ThenMatrixType the type of the \em then expression + * \param ElseMatrixType the type of the \em else expression + * + * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:. + * It is the return type of DenseBase::select() and most of the time this is the only way it is used. + * + * \sa DenseBase::select(const DenseBase&, const DenseBase&) const + */ + +namespace internal { +template +struct traits > + : traits +{ + typedef typename traits::Scalar Scalar; + typedef Dense StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ConditionMatrixType::Nested ConditionMatrixNested; + typedef typename ThenMatrixType::Nested ThenMatrixNested; + typedef typename ElseMatrixType::Nested ElseMatrixNested; + enum { + RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime, + ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit + }; +}; +} + +template +class Select : public internal::dense_xpr_base< Select >::type, + internal::no_assignment_operator +{ + public: + + typedef typename internal::dense_xpr_base" << endl; + cerr << "available actions:" << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; + exit(1); +} + +int main(int argc, char* argv[]) +{ + cout.precision(default_precision); + cerr.precision(default_precision); + + vector> available_actions; + available_actions.emplace_back(new partition_action_t); + available_actions.emplace_back(new evaluate_defaults_action_t); + + vector input_filenames; + + action_t* action = nullptr; + + if (argc < 2) { + show_usage_and_exit(argc, argv, available_actions); + } + for (int i = 1; i < argc; i++) { + bool arg_handled = false; + // Step 1. Try to match action invokation names. + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[i], (*it)->invokation_name())) { + if (!action) { + action = it->get(); + arg_handled = true; + break; + } else { + cerr << "can't specify more than one action!" << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + } + if (arg_handled) { + continue; + } + // Step 2. Try to match option names. + if (argv[i][0] == '-') { + if (!strcmp(argv[i], "--only-cubic-sizes")) { + only_cubic_sizes = true; + arg_handled = true; + } + if (!strcmp(argv[i], "--dump-tables")) { + dump_tables = true; + arg_handled = true; + } + if (!arg_handled) { + cerr << "Unrecognized option: " << argv[i] << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + if (arg_handled) { + continue; + } + // Step 3. Default to interpreting args as input filenames. + input_filenames.emplace_back(argv[i]); + } + + if (dump_tables && only_cubic_sizes) { + cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl; + show_usage_and_exit(argc, argv, available_actions); + } + + if (!action) { + show_usage_and_exit(argc, argv, available_actions); + } + + action->run(input_filenames); +} diff --git a/thirdparty/eigen/bench/basicbench.cxxlist b/thirdparty/eigen/bench/basicbench.cxxlist new file mode 100644 index 000000000..a8ab34e0d --- /dev/null +++ b/thirdparty/eigen/bench/basicbench.cxxlist @@ -0,0 +1,28 @@ +#!/bin/bash + +# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG" +# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG -finline-limit=20000" + +# CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG" +#CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000" + +# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG" +#CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000" +# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate" +# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use" + +# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG" +#CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000" +# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate" +# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use" + +# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-genx" +# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-use" + +#CLIST[((g++))]="/opt/intel/Compiler/11.1/072/bin/intel64/icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -lrt" +CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt" +CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -lrt" +CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt" +CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -lrt" +CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt" +CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -lrt" diff --git a/thirdparty/eigen/bench/basicbenchmark.cpp b/thirdparty/eigen/bench/basicbenchmark.cpp new file mode 100644 index 000000000..a26ea853f --- /dev/null +++ b/thirdparty/eigen/bench/basicbenchmark.cpp @@ -0,0 +1,35 @@ + +#include +#include "BenchUtil.h" +#include "basicbenchmark.h" + +int main(int argc, char *argv[]) +{ + DISABLE_SSE_EXCEPTIONS(); + + // this is the list of matrix type and size we want to bench: + // ((suffix) (matrix size) (number of iterations)) + #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000)) +// #define MODES ((Xd)(20)(10000)) + + #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \ + << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / " + + std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl; + + const int tries = 10; + + #define _RUN_BENCH(R,ARG,EL) \ + std::cout << ARG( \ + BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\ + BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \ + << " "; + + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES ); + std::cout << endl; + BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic, MODES ); + std::cout << endl; + + return 0; +} diff --git a/thirdparty/eigen/bench/basicbenchmark.h b/thirdparty/eigen/bench/basicbenchmark.h new file mode 100644 index 000000000..3fdc35732 --- /dev/null +++ b/thirdparty/eigen/bench/basicbenchmark.h @@ -0,0 +1,63 @@ + +#ifndef EIGEN_BENCH_BASICBENCH_H +#define EIGEN_BENCH_BASICBENCH_H + +enum {LazyEval, EarlyEval, OmpEval}; + +template +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline)); + +template +void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) +{ + for(int a = 0; a < iterations; a++) + { + if (Mode==LazyEval) + { + asm("#begin_bench_loop LazyEval"); + if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = (I + 0.00005 * (m + m.lazy() * m)).eval(); + } + else if (Mode==OmpEval) + { + asm("#begin_bench_loop OmpEval"); + if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = (I + 0.00005 * (m + m.lazy() * m)).evalOMP(); + } + else + { + asm("#begin_bench_loop EarlyEval"); + if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize"); + m = I + 0.00005 * (m + m * m); + } + asm("#end_bench_loop"); + } +} + +template +double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline)); + +template +double benchBasic(const MatrixType& mat, int iterations, int tries) +{ + const int rows = mat.rows(); + const int cols = mat.cols(); + + MatrixType I(rows,cols); + MatrixType m(rows,cols); + + initMatrix_identity(I); + + Eigen::BenchTimer timer; + for(uint t=0; t(I, m, iterations); + timer.stop(); + cerr << m; + } + return timer.value(); +}; + +#endif // EIGEN_BENCH_BASICBENCH_H diff --git a/thirdparty/eigen/bench/benchBlasGemm.cpp b/thirdparty/eigen/bench/benchBlasGemm.cpp new file mode 100644 index 000000000..cb086a555 --- /dev/null +++ b/thirdparty/eigen/bench/benchBlasGemm.cpp @@ -0,0 +1,219 @@ +// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas +// possible options: +// -DEIGEN_DONT_VECTORIZE +// -msse2 + +// #define EIGEN_DEFAULT_TO_ROW_MAJOR +#define _FLOAT + +#include + +#include +#include "BenchTimer.h" + +// include the BLAS headers +extern "C" { +#include +} +#include + +#ifdef _FLOAT +typedef float Scalar; +#define CBLAS_GEMM cblas_sgemm +#else +typedef double Scalar; +#define CBLAS_GEMM cblas_dgemm +#endif + + +typedef Eigen::Matrix MyMatrix; +void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops); +void check_product(int M, int N, int K); +void check_product(void); + +int main(int argc, char *argv[]) +{ + // disable SSE exceptions + #ifdef __GNUC__ + { + int aux; + asm( + "stmxcsr %[aux] \n\t" + "orl $32832, %[aux] \n\t" + "ldmxcsr %[aux] \n\t" + : : [aux] "m" (aux)); + } + #endif + + int nbtries=1, nbloops=1, M, N, K; + + if (argc==2) + { + if (std::string(argv[1])=="check") + check_product(); + else + M = N = K = atoi(argv[1]); + } + else if ((argc==3) && (std::string(argv[1])=="auto")) + { + M = N = K = atoi(argv[2]); + nbloops = 1000000000/(M*M*M); + if (nbloops<1) + nbloops = 1; + nbtries = 6; + } + else if (argc==4) + { + M = N = K = atoi(argv[1]); + nbloops = atoi(argv[2]); + nbtries = atoi(argv[3]); + } + else if (argc==6) + { + M = atoi(argv[1]); + N = atoi(argv[2]); + K = atoi(argv[3]); + nbloops = atoi(argv[4]); + nbtries = atoi(argv[5]); + } + else + { + std::cout << "Usage: " << argv[0] << " size \n"; + std::cout << "Usage: " << argv[0] << " auto size\n"; + std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n"; + std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n"; + std::cout << "Usage: " << argv[0] << " check\n"; + std::cout << "Options:\n"; + std::cout << " size unique size of the 2 matrices (integer)\n"; + std::cout << " auto automatically set the number of repetitions and tries\n"; + std::cout << " nbloops number of times the GEMM routines is executed\n"; + std::cout << " nbtries number of times the loop is benched (return the best try)\n"; + std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n"; + std::cout << " check check eigen product using cblas as a reference\n"; + exit(1); + } + + double nbmad = double(M) * double(N) * double(K) * double(nbloops); + + if (!(std::string(argv[1])=="auto")) + std::cout << M << " x " << N << " x " << K << "\n"; + + Scalar alpha, beta; + MyMatrix ma(M,K), mb(K,N), mc(M,N); + ma = MyMatrix::Random(M,K); + mb = MyMatrix::Random(K,N); + mc = MyMatrix::Random(M,N); + + Eigen::BenchTimer timer; + + // we simply compute c += a*b, so: + alpha = 1; + beta = 1; + + // bench cblas + // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B); + if (!(std::string(argv[1])=="auto")) + { + timer.reset(); + for (uint k=0 ; k(1,64); + N = internal::random(1,768); + K = internal::random(1,768); + M = (0 + M) * 1; + std::cout << M << " x " << N << " x " << K << "\n"; + check_product(M, N, K); + } +} + diff --git a/thirdparty/eigen/bench/benchCholesky.cpp b/thirdparty/eigen/bench/benchCholesky.cpp new file mode 100644 index 000000000..9a8e7cf63 --- /dev/null +++ b/thirdparty/eigen/bench/benchCholesky.cpp @@ -0,0 +1,142 @@ + +// g++ -DNDEBUG -O3 -I.. benchLLT.cpp -o benchLLT && ./benchLLT +// options: +// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 +// -DEIGEN_DONT_VECTORIZE +// -msse2 +// -DREPEAT=100 +// -DTRIES=10 +// -DSCALAR=double + +#include + +#include +#include +#include +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 10000 +#endif + +#ifndef TRIES +#define TRIES 10 +#endif + +typedef float Scalar; + +template +__attribute__ ((noinline)) void benchLLT(const MatrixType& m) +{ + int rows = m.rows(); + int cols = m.cols(); + + double cost = 0; + for (int j=0; j SquareMatrixType; + + MatrixType a = MatrixType::Random(rows,cols); + SquareMatrixType covMat = a * a.adjoint(); + + BenchTimer timerNoSqrt, timerSqrt; + + Scalar acc = 0; + int r = internal::random(0,covMat.rows()-1); + int c = internal::random(0,covMat.cols()-1); + for (int t=0; t cholnosqrt(covMat); + acc += cholnosqrt.matrixL().coeff(r,c); + } + timerNoSqrt.stop(); + } + + for (int t=0; t chol(covMat); + acc += chol.matrixL().coeff(r,c); + } + timerSqrt.stop(); + } + + if (MatrixType::RowsAtCompileTime==Dynamic) + std::cout << "dyn "; + else + std::cout << "fixed "; + std::cout << covMat.rows() << " \t" + << (timerNoSqrt.best()) / repeats << "s " + << "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t" + << (timerSqrt.best()) / repeats << "s " + << "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n"; + + + #ifdef BENCH_GSL + if (MatrixType::RowsAtCompileTime==Dynamic) + { + timerSqrt.reset(); + + gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols()); + + eiToGsl(covMat, &gslCovMat); + for (int t=0; t0; ++i) + benchLLT(Matrix(dynsizes[i],dynsizes[i])); + + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + benchLLT(Matrix()); + return 0; +} + diff --git a/thirdparty/eigen/bench/benchEigenSolver.cpp b/thirdparty/eigen/bench/benchEigenSolver.cpp new file mode 100644 index 000000000..dd78c7e01 --- /dev/null +++ b/thirdparty/eigen/bench/benchEigenSolver.cpp @@ -0,0 +1,212 @@ + +// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver +// options: +// -DBENCH_GMM +// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3 +// -DEIGEN_DONT_VECTORIZE +// -msse2 +// -DREPEAT=100 +// -DTRIES=10 +// -DSCALAR=double + +#include + +#include +#include +#include +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 1000 +#endif + +#ifndef TRIES +#define TRIES 4 +#endif + +#ifndef SCALAR +#define SCALAR float +#endif + +typedef SCALAR Scalar; + +template +__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m) +{ + int rows = m.rows(); + int cols = m.cols(); + + int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows)))); + int saRepeats = stdRepeats * 4; + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix SquareMatrixType; + + MatrixType a = MatrixType::Random(rows,cols); + SquareMatrixType covMat = a * a.adjoint(); + + BenchTimer timerSa, timerStd; + + Scalar acc = 0; + int r = internal::random(0,covMat.rows()-1); + int c = internal::random(0,covMat.cols()-1); + { + SelfAdjointEigenSolver ei(covMat); + for (int t=0; t ei(covMat); + for (int t=0; t gmmCovMat(covMat.rows(),covMat.cols()); + gmm::dense_matrix eigvect(covMat.rows(),covMat.cols()); + std::vector eigval(covMat.rows()); + eiToGmm(covMat, gmmCovMat); + for (int t=0; t0; ++i) + benchEigenSolver(Matrix(dynsizes[i],dynsizes[i])); + + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + benchEigenSolver(Matrix()); + return 0; +} + diff --git a/thirdparty/eigen/bench/benchFFT.cpp b/thirdparty/eigen/bench/benchFFT.cpp new file mode 100644 index 000000000..3eb1a1ac0 --- /dev/null +++ b/thirdparty/eigen/bench/benchFFT.cpp @@ -0,0 +1,115 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include +#include +#include +#include + +#include + +using namespace Eigen; +using namespace std; + + +template +string nameof(); + +template <> string nameof() {return "float";} +template <> string nameof() {return "double";} +template <> string nameof() {return "long double";} + +#ifndef TYPE +#define TYPE float +#endif + +#ifndef NFFT +#define NFFT 1024 +#endif +#ifndef NDATA +#define NDATA 1000000 +#endif + +using namespace Eigen; + +template +void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false) +{ + typedef typename NumTraits::Real Scalar; + typedef typename std::complex Complex; + int nits = NDATA/nfft; + vector inbuf(nfft); + vector outbuf(nfft); + FFT< Scalar > fft; + + if (unscaled) { + fft.SetFlag(fft.Unscaled); + cout << "unscaled "; + } + if (halfspec) { + fft.SetFlag(fft.HalfSpectrum); + cout << "halfspec "; + } + + + std::fill(inbuf.begin(),inbuf.end(),0); + fft.fwd( outbuf , inbuf); + + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + if (fwd) + for(int i = 0; i < nits; i++) + fft.fwd( outbuf , inbuf); + else + for(int i = 0; i < nits; i++) + fft.inv(inbuf,outbuf); + timer.stop(); + } + + cout << nameof() << " "; + double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); + if ( NumTraits::IsComplex ) { + cout << "complex"; + }else{ + cout << "real "; + mflops /= 2; + } + + + if (fwd) + cout << " fwd"; + else + cout << " inv"; + + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} + +int main(int argc,char ** argv) +{ + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench(NFFT,false,true); + bench(NFFT,false,true,true); + + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + return 0; +} diff --git a/thirdparty/eigen/bench/benchGeometry.cpp b/thirdparty/eigen/bench/benchGeometry.cpp new file mode 100644 index 000000000..6e16c0331 --- /dev/null +++ b/thirdparty/eigen/bench/benchGeometry.cpp @@ -0,0 +1,134 @@ +#include +#include +#include +#include +#include + +using namespace Eigen; +using namespace std; + +#ifndef REPEAT +#define REPEAT 1000000 +#endif + +enum func_opt +{ + TV, + TMATV, + TMATVMAT, +}; + + +template +struct func; + +template +struct func +{ + static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 ) + { + asm (""); + return a1 * a2; + } +}; + +template +struct func +{ + static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 ) + { + asm (""); + return a1.matrix() * a2; + } +}; + +template +struct func +{ + static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 ) + { + asm (""); + return res(a1.matrix() * a2.matrix()); + } +}; + +template +struct test_transform +{ + static void run() + { + arg1 a1; + a1.setIdentity(); + arg2 a2; + a2.setIdentity(); + + BenchTimer timer; + timer.reset(); + for (int k=0; k<10; ++k) + { + timer.start(); + for (int k=0; k Trans;\ + typedef Matrix Vec;\ + typedef func Func;\ + test_transform< Func, Trans, Vec >::run();\ + } + +#define run_trans( op, scalar, mode, option ) \ + std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \ + {\ + typedef Transform Trans;\ + typedef func Func;\ + test_transform< Func, Trans, Trans >::run();\ + } + +int main(int argc, char* argv[]) +{ + cout << "vec = trans * vec" << endl; + run_vec(TV, float, Isometry, AutoAlign, 3); + run_vec(TV, float, Isometry, DontAlign, 3); + run_vec(TV, float, Isometry, AutoAlign, 4); + run_vec(TV, float, Isometry, DontAlign, 4); + run_vec(TV, float, Projective, AutoAlign, 4); + run_vec(TV, float, Projective, DontAlign, 4); + run_vec(TV, double, Isometry, AutoAlign, 3); + run_vec(TV, double, Isometry, DontAlign, 3); + run_vec(TV, double, Isometry, AutoAlign, 4); + run_vec(TV, double, Isometry, DontAlign, 4); + run_vec(TV, double, Projective, AutoAlign, 4); + run_vec(TV, double, Projective, DontAlign, 4); + + cout << "vec = trans.matrix() * vec" << endl; + run_vec(TMATV, float, Isometry, AutoAlign, 4); + run_vec(TMATV, float, Isometry, DontAlign, 4); + run_vec(TMATV, double, Isometry, AutoAlign, 4); + run_vec(TMATV, double, Isometry, DontAlign, 4); + + cout << "trans = trans1 * trans" << endl; + run_trans(TV, float, Isometry, AutoAlign); + run_trans(TV, float, Isometry, DontAlign); + run_trans(TV, double, Isometry, AutoAlign); + run_trans(TV, double, Isometry, DontAlign); + run_trans(TV, float, Projective, AutoAlign); + run_trans(TV, float, Projective, DontAlign); + run_trans(TV, double, Projective, AutoAlign); + run_trans(TV, double, Projective, DontAlign); + + cout << "trans = trans1.matrix() * trans.matrix()" << endl; + run_trans(TMATVMAT, float, Isometry, AutoAlign); + run_trans(TMATVMAT, float, Isometry, DontAlign); + run_trans(TMATVMAT, double, Isometry, AutoAlign); + run_trans(TMATVMAT, double, Isometry, DontAlign); +} + diff --git a/thirdparty/eigen/bench/benchVecAdd.cpp b/thirdparty/eigen/bench/benchVecAdd.cpp new file mode 100644 index 000000000..ce8e1e911 --- /dev/null +++ b/thirdparty/eigen/bench/benchVecAdd.cpp @@ -0,0 +1,135 @@ + +#include +#include +#include +using namespace Eigen; + +#ifndef SIZE +#define SIZE 50 +#endif + +#ifndef REPEAT +#define REPEAT 10000 +#endif + +typedef float Scalar; + +__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); +__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); +__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); + +int main(int argc, char* argv[]) +{ + int size = SIZE * 8; + int size2 = size * size; + Scalar* a = internal::aligned_new(size2); + Scalar* b = internal::aligned_new(size2+4)+1; + Scalar* c = internal::aligned_new(size2); + + for (int i=0; i2 ; --innersize) + { + if (size2%innersize==0) + { + int outersize = size2/innersize; + MatrixXf ma = Map(a, innersize, outersize ); + MatrixXf mb = Map(b, innersize, outersize ); + MatrixXf mc = Map(c, innersize, outersize ); + timer.reset(); + for (int k=0; k<3; ++k) + { + timer.start(); + benchVec(ma, mb, mc); + timer.stop(); + } + std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; + } + } + + VectorXf va = Map(a, size2); + VectorXf vb = Map(b, size2); + VectorXf vc = Map(c, size2); + timer.reset(); + for (int k=0; k<3; ++k) + { + timer.start(); + benchVec(va, vb, vc); + timer.stop(); + } + std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; + + return 0; +} + +void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) +{ + for (int k=0; k::type PacketScalar; + const int PacketSize = internal::packet_traits::size; + PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; + for (int k=0; k +// -DSCALARA=double or -DSCALARB=double +// -DHAVE_BLAS +// -DDECOUPLED +// + +#include +#include +#include + +using namespace std; +using namespace Eigen; + +#ifndef SCALAR +// #define SCALAR std::complex +#define SCALAR float +#endif + +#ifndef SCALARA +#define SCALARA SCALAR +#endif + +#ifndef SCALARB +#define SCALARB SCALAR +#endif + +typedef SCALAR Scalar; +typedef NumTraits::Real RealScalar; +typedef Matrix A; +typedef Matrix B; +typedef Matrix C; +typedef Matrix M; + +#ifdef HAVE_BLAS + +extern "C" { + #include +} + +static float fone = 1; +static float fzero = 0; +static double done = 1; +static double szero = 0; +static std::complex cfone = 1; +static std::complex cfzero = 0; +static std::complex cdone = 1; +static std::complex cdzero = 0; +static char notrans = 'N'; +static char trans = 'T'; +static char nonunit = 'N'; +static char lower = 'L'; +static char right = 'R'; +static int intone = 1; + +void blas_gemm(const MatrixXf& a, const MatrixXf& b, MatrixXf& c) +{ + int M = c.rows(); int N = c.cols(); int K = a.cols(); + int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows(); + + sgemm_(¬rans,¬rans,&M,&N,&K,&fone, + const_cast(a.data()),&lda, + const_cast(b.data()),&ldb,&fone, + c.data(),&ldc); +} + +EIGEN_DONT_INLINE void blas_gemm(const MatrixXd& a, const MatrixXd& b, MatrixXd& c) +{ + int M = c.rows(); int N = c.cols(); int K = a.cols(); + int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows(); + + dgemm_(¬rans,¬rans,&M,&N,&K,&done, + const_cast(a.data()),&lda, + const_cast(b.data()),&ldb,&done, + c.data(),&ldc); +} + +void blas_gemm(const MatrixXcf& a, const MatrixXcf& b, MatrixXcf& c) +{ + int M = c.rows(); int N = c.cols(); int K = a.cols(); + int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows(); + + cgemm_(¬rans,¬rans,&M,&N,&K,(float*)&cfone, + const_cast((const float*)a.data()),&lda, + const_cast((const float*)b.data()),&ldb,(float*)&cfone, + (float*)c.data(),&ldc); +} + +void blas_gemm(const MatrixXcd& a, const MatrixXcd& b, MatrixXcd& c) +{ + int M = c.rows(); int N = c.cols(); int K = a.cols(); + int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows(); + + zgemm_(¬rans,¬rans,&M,&N,&K,(double*)&cdone, + const_cast((const double*)a.data()),&lda, + const_cast((const double*)b.data()),&ldb,(double*)&cdone, + (double*)c.data(),&ldc); +} + + + +#endif + +void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci) +{ + cr.noalias() += ar * br; + cr.noalias() -= ai * bi; + ci.noalias() += ar * bi; + ci.noalias() += ai * br; +} + +void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci) +{ + cr.noalias() += a * br; + ci.noalias() += a * bi; +} + +void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci) +{ + cr.noalias() += ar * b; + ci.noalias() += ai * b; +} + +template +EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) +{ + c.noalias() += a * b; +} + +int main(int argc, char ** argv) +{ + std::ptrdiff_t l1 = internal::queryL1CacheSize(); + std::ptrdiff_t l2 = internal::queryTopLevelCacheSize(); + std::cout << "L1 cache size = " << (l1>0 ? l1/1024 : -1) << " KB\n"; + std::cout << "L2/L3 cache size = " << (l2>0 ? l2/1024 : -1) << " KB\n"; + typedef internal::gebp_traits Traits; + std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n"; + + int rep = 1; // number of repetitions per try + int tries = 2; // number of tries, we keep the best + + int s = 2048; + int m = s; + int n = s; + int p = s; + int cache_size1=-1, cache_size2=l2, cache_size3 = 0; + + bool need_help = false; + for (int i=1; i -c -t -p \n"; + std::cout << " : size\n"; + std::cout << " : rows columns depth\n"; + return 1; + } + +#if EIGEN_VERSION_AT_LEAST(3,2,90) + if(cache_size1>0) + setCpuCacheSizes(cache_size1,cache_size2,cache_size3); +#endif + + A a(m,p); a.setRandom(); + B b(p,n); b.setRandom(); + C c(m,n); c.setOnes(); + C rc = c; + + std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n"; + std::ptrdiff_t mc(m), nc(n), kc(p); + internal::computeProductBlockingSizes(kc, mc, nc); + std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n"; + + C r = c; + + // check the parallel product is correct + #if defined EIGEN_HAS_OPENMP + Eigen::initParallel(); + int procs = omp_get_max_threads(); + if(procs>1) + { + #ifdef HAVE_BLAS + blas_gemm(a,b,r); + #else + omp_set_num_threads(1); + r.noalias() += a * b; + omp_set_num_threads(procs); + #endif + c.noalias() += a * b; + if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n"; + } + #elif defined HAVE_BLAS + blas_gemm(a,b,r); + c.noalias() += a * b; + if(!r.isApprox(c)) { + std::cout << r - c << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } + #else + if(1.*m*n*p<2000.*2000*2000) + { + gemm(a,b,c); + r.noalias() += a.cast() .lazyProduct( b.cast() ); + if(!r.isApprox(c)) { + std::cout << r - c << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } + } + #endif + + #ifdef HAVE_BLAS + BenchTimer tblas; + c = rc; + BENCH(tblas, tries, rep, blas_gemm(a,b,c)); + std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n"; + std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n"; + #endif + + BenchTimer tmt; + c = rc; + BENCH(tmt, tries, rep, gemm(a,b,c)); + std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; + std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; + + #ifdef EIGEN_HAS_OPENMP + if(procs>1) + { + BenchTimer tmono; + omp_set_num_threads(1); + Eigen::setNbThreads(1); + c = rc; + BENCH(tmono, tries, rep, gemm(a,b,c)); + std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n"; + std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n"; + std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n"; + } + #endif + + if(1.*m*n*p<30*30*30) + { + BenchTimer tmt; + c = rc; + BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b)); + std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; + std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; + } + + #ifdef DECOUPLED + if((NumTraits::IsComplex) && (NumTraits::IsComplex)) + { + M ar(m,p); ar.setRandom(); + M ai(m,p); ai.setRandom(); + M br(p,n); br.setRandom(); + M bi(p,n); bi.setRandom(); + M cr(m,n); cr.setRandom(); + M ci(m,n); ci.setRandom(); + + BenchTimer t; + BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci)); + std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; + std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; + } + if((!NumTraits::IsComplex) && (NumTraits::IsComplex)) + { + M a(m,p); a.setRandom(); + M br(p,n); br.setRandom(); + M bi(p,n); bi.setRandom(); + M cr(m,n); cr.setRandom(); + M ci(m,n); ci.setRandom(); + + BenchTimer t; + BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci)); + std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; + std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; + } + if((NumTraits::IsComplex) && (!NumTraits::IsComplex)) + { + M ar(m,p); ar.setRandom(); + M ai(m,p); ai.setRandom(); + M b(p,n); b.setRandom(); + M cr(m,n); cr.setRandom(); + M ci(m,n); ci.setRandom(); + + BenchTimer t; + BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci)); + std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n"; + std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n"; + } + #endif + + return 0; +} + diff --git a/thirdparty/eigen/bench/bench_multi_compilers.sh b/thirdparty/eigen/bench/bench_multi_compilers.sh new file mode 100755 index 000000000..27e91f1d5 --- /dev/null +++ b/thirdparty/eigen/bench/bench_multi_compilers.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +if (($# < 2)); then + echo "Usage: $0 compilerlist.txt benchfile.cpp" +else + +compilerlist=$1 +benchfile=$2 + +g=0 +source $compilerlist + +# for each compiler, compile benchfile and run the benchmark +for (( i=0 ; i /dev/null + echo "" + else + echo "compiler not found: $compiler" + fi +done + +fi diff --git a/thirdparty/eigen/bench/bench_norm.cpp b/thirdparty/eigen/bench/bench_norm.cpp new file mode 100644 index 000000000..129afcfb2 --- /dev/null +++ b/thirdparty/eigen/bench/bench_norm.cpp @@ -0,0 +1,360 @@ +#include +#include +#include +#include "BenchTimer.h" +using namespace Eigen; +using namespace std; + +template +EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) +{ + return v.norm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) +{ + return v.stableNorm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) +{ + return v.hypotNorm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) +{ + return v.blueNorm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) +{ + typedef typename T::Scalar Scalar; + int n = v.size(); + Scalar scale = 0; + Scalar ssq = 1; + for (int i=0;i= ax) + { + ssq += numext::abs2(ax/scale); + } + else + { + ssq = Scalar(1) + ssq * numext::abs2(scale/ax); + scale = ax; + } + } + return scale * std::sqrt(ssq); +} + +template +EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) +{ + typedef typename T::Scalar Scalar; + Scalar s = v.array().abs().maxCoeff(); + return s*(v/s).norm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v) +{ + return v.stableNorm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) +{ + int n =v.size() / 2; + for (int i=0;i0) + { + for (int i=0;i +EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) +{ + #ifndef EIGEN_VECTORIZE + return v.blueNorm(); + #else + typedef typename T::Scalar Scalar; + + static int nmax = 0; + static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; + int n; + + if(nmax <= 0) + { + int nbig, ibeta, it, iemin, iemax, iexp; + Scalar abig, eps; + + nbig = std::numeric_limits::max(); // largest integer + ibeta = std::numeric_limits::radix; //NumTraits::Base; // base for floating-point numbers + it = std::numeric_limits::digits; //NumTraits::Mantissa; // number of base-beta digits in mantissa + iemin = std::numeric_limits::min_exponent; // minimum exponent + iemax = std::numeric_limits::max_exponent; // maximum exponent + rbig = std::numeric_limits::max(); // largest floating-point number + + // Check the basic machine-dependent constants. + if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) + || (it<=4 && ibeta <= 3 ) || it<2) + { + eigen_assert(false && "the algorithm cannot be guaranteed on this computer"); + } + iexp = -((1-iemin)/2); + b1 = std::pow(ibeta, iexp); // lower boundary of midrange + iexp = (iemax + 1 - it)/2; + b2 = std::pow(ibeta,iexp); // upper boundary of midrange + + iexp = (2-iemin)/2; + s1m = std::pow(ibeta,iexp); // scaling factor for lower range + iexp = - ((iemax+it)/2); + s2m = std::pow(ibeta,iexp); // scaling factor for upper range + + overfl = rbig*s2m; // overfow boundary for abig + eps = std::pow(ibeta, 1-it); + relerr = std::sqrt(eps); // tolerance for neglecting asml + abig = 1.0/eps - 1.0; + if (Scalar(nbig)>abig) nmax = abig; // largest safe n + else nmax = nbig; + } + + typedef typename internal::packet_traits::type Packet; + const int ps = internal::packet_traits::size; + Packet pasml = internal::pset1(Scalar(0)); + Packet pamed = internal::pset1(Scalar(0)); + Packet pabig = internal::pset1(Scalar(0)); + Packet ps2m = internal::pset1(s2m); + Packet ps1m = internal::pset1(s1m); + Packet pb2 = internal::pset1(b2); + Packet pb1 = internal::pset1(b1); + for(int j=0; j(j)); + Packet ax_s2m = internal::pmul(ax,ps2m); + Packet ax_s1m = internal::pmul(ax,ps1m); + Packet maskBig = internal::plt(pb2,ax); + Packet maskSml = internal::plt(ax,pb1); + +// Packet maskMed = internal::pand(maskSml,maskBig); +// Packet scale = internal::pset1(Scalar(0)); +// scale = internal::por(scale, internal::pand(maskBig,ps2m)); +// scale = internal::por(scale, internal::pand(maskSml,ps1m)); +// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed)); +// ax = internal::pmul(ax,scale); +// ax = internal::pmul(ax,ax); +// pabig = internal::padd(pabig, internal::pand(maskBig, ax)); +// pasml = internal::padd(pasml, internal::pand(maskSml, ax)); +// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed)); + + + pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m))); + pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m))); + pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig))); + } + Scalar abig = internal::predux(pabig); + Scalar asml = internal::predux(pasml); + Scalar amed = internal::predux(pamed); + if(abig > Scalar(0)) + { + abig = std::sqrt(abig); + if(abig > overfl) + { + eigen_assert(false && "overflow"); + return rbig; + } + if(amed > Scalar(0)) + { + abig = abig/s2m; + amed = std::sqrt(amed); + } + else + { + return abig/s2m; + } + + } + else if(asml > Scalar(0)) + { + if (amed > Scalar(0)) + { + abig = std::sqrt(amed); + amed = std::sqrt(asml) / s1m; + } + else + { + return std::sqrt(asml)/s1m; + } + } + else + { + return std::sqrt(amed); + } + asml = std::min(abig, amed); + abig = std::max(abig, amed); + if(asml <= abig*relerr) + return abig; + else + return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig)); + #endif +} + +#define BENCH_PERF(NRM) { \ + float af = 0; double ad = 0; std::complex ac = 0; \ + Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\ + for (int k=0; k()); + double yd = based * std::abs(internal::random()); + VectorXf vf = VectorXf::Ones(s) * yf; + VectorXd vd = VectorXd::Ones(s) * yd; + + std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n"; + std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; + std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; + std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n"; + std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n"; + std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n"; + std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n"; + std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n"; +} + +void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) +{ + VectorXf vf(s); + VectorXd vd(s); + for (int i=0; i()) * std::pow(double(10), internal::random(ef0,ef1)); + vd[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ed0,ed1)); + } + + //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; + std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast()) << "\t" << sqsumNorm(vd.cast()) << "\n"; + std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast()) << "\t" << hypotNorm(vd.cast()) << "\n"; + std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast()) << "\t" << blueNorm(vd.cast()) << "\n"; + std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast()) << "\t" << lapackNorm(vd.cast()) << "\n"; + std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast()) << "\t" << twopassNorm(vd.cast()) << "\n"; +// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast()) << "\t" << bl2passNorm(vd.cast()) << "\n"; +} + +int main(int argc, char** argv) +{ + int tries = 10; + int iters = 100000; + double y = 1.1345743233455785456788e12 * internal::random(); + VectorXf v = VectorXf::Ones(1024) * y; + +// return 0; + int s = 10000; + double basef_ok = 1.1345743233455785456788e15; + double based_ok = 1.1345743233455785456788e95; + + double basef_under = 1.1345743233455785456788e-27; + double based_under = 1.1345743233455785456788e-303; + + double basef_over = 1.1345743233455785456788e+27; + double based_over = 1.1345743233455785456788e+302; + + std::cout.precision(20); + + std::cerr << "\nNo under/overflow:\n"; + check_accuracy(basef_ok, based_ok, s); + + std::cerr << "\nUnderflow:\n"; + check_accuracy(basef_under, based_under, s); + + std::cerr << "\nOverflow:\n"; + check_accuracy(basef_over, based_over, s); + + std::cerr << "\nVarying (over):\n"; + for (int k=0; k<1; ++k) + { + check_accuracy_var(20,27,190,302,s); + std::cout << "\n"; + } + + std::cerr << "\nVarying (under):\n"; + for (int k=0; k<1; ++k) + { + check_accuracy_var(-27,20,-302,-190,s); + std::cout << "\n"; + } + + y = 1; + std::cout.precision(4); + int s1 = 1024*1024*32; + std::cerr << "Performance (out of cache, " << s1 << "):\n"; + { + int iters = 1; + VectorXf vf = VectorXf::Random(s1) * y; + VectorXd vd = VectorXd::Random(s1) * y; + VectorXcf vcf = VectorXcf::Random(s1) * y; + BENCH_PERF(sqsumNorm); + BENCH_PERF(stableNorm); + BENCH_PERF(blueNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + BENCH_PERF(twopassNorm); + BENCH_PERF(bl2passNorm); + } + + std::cerr << "\nPerformance (in cache, " << 512 << "):\n"; + { + int iters = 100000; + VectorXf vf = VectorXf::Random(512) * y; + VectorXd vd = VectorXd::Random(512) * y; + VectorXcf vcf = VectorXcf::Random(512) * y; + BENCH_PERF(sqsumNorm); + BENCH_PERF(stableNorm); + BENCH_PERF(blueNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + BENCH_PERF(twopassNorm); + BENCH_PERF(bl2passNorm); + } +} diff --git a/thirdparty/eigen/bench/bench_reverse.cpp b/thirdparty/eigen/bench/bench_reverse.cpp new file mode 100644 index 000000000..1e69ca1b2 --- /dev/null +++ b/thirdparty/eigen/bench/bench_reverse.cpp @@ -0,0 +1,84 @@ + +#include +#include +#include +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 100000 +#endif + +#ifndef TRIES +#define TRIES 20 +#endif + +typedef double Scalar; + +template +__attribute__ ((noinline)) void bench_reverse(const MatrixType& m) +{ + int rows = m.rows(); + int cols = m.cols(); + int size = m.size(); + + int repeats = (REPEAT*1000)/size; + MatrixType a = MatrixType::Random(rows,cols); + MatrixType b = MatrixType::Random(rows,cols); + + BenchTimer timerB, timerH, timerV; + + Scalar acc = 0; + int r = internal::random(0,rows-1); + int c = internal::random(0,cols-1); + for (int t=0; t0; ++i) + { + bench_reverse(Matrix(dynsizes[i],dynsizes[i])); + bench_reverse(Matrix(dynsizes[i]*dynsizes[i])); + } +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); +// bench_reverse(Matrix()); + return 0; +} + diff --git a/thirdparty/eigen/bench/bench_sum.cpp b/thirdparty/eigen/bench/bench_sum.cpp new file mode 100644 index 000000000..a3d925e4f --- /dev/null +++ b/thirdparty/eigen/bench/bench_sum.cpp @@ -0,0 +1,18 @@ +#include +#include +using namespace Eigen; +using namespace std; + +int main() +{ + typedef Matrix Vec; + Vec v(SIZE); + v.setZero(); + v[0] = 1; + v[1] = 2; + for(int i = 0; i < 1000000; i++) + { + v.coeffRef(0) += v.sum() * SCALAR(1e-20); + } + cout << v.sum() << endl; +} diff --git a/thirdparty/eigen/bench/bench_unrolling b/thirdparty/eigen/bench/bench_unrolling new file mode 100755 index 000000000..826443845 --- /dev/null +++ b/thirdparty/eigen/bench/bench_unrolling @@ -0,0 +1,12 @@ +#!/bin/bash + +# gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000" +# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions" +CXX=${CXX-g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value + +for ((i=1; i<16; ++i)); do + echo "Matrix size: $i x $i :" + $CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null + $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null + echo " " +done diff --git a/thirdparty/eigen/bench/benchmark-blocking-sizes.cpp b/thirdparty/eigen/bench/benchmark-blocking-sizes.cpp new file mode 100644 index 000000000..827be2880 --- /dev/null +++ b/thirdparty/eigen/bench/benchmark-blocking-sizes.cpp @@ -0,0 +1,677 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include +#include +#include +#include +#include + +bool eigen_use_specific_block_size; +int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n; +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n +#include + +#include + +using namespace Eigen; +using namespace std; + +static BenchTimer timer; + +// how many times we repeat each measurement. +// measurements are randomly shuffled - we're not doing +// all N identical measurements in a row. +const int measurement_repetitions = 3; + +// Timings below this value are too short to be accurate, +// we'll repeat measurements with more iterations until +// we get a timing above that threshold. +const float min_accurate_time = 1e-2f; + +// See --min-working-set-size command line parameter. +size_t min_working_set_size = 0; + +float max_clock_speed = 0.0f; + +// range of sizes that we will benchmark (in all 3 K,M,N dimensions) +const size_t maxsize = 2048; +const size_t minsize = 16; + +typedef MatrixXf MatrixType; +typedef MatrixType::Scalar Scalar; +typedef internal::packet_traits::type Packet; + +static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two"); +static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two"); +static_assert(maxsize > minsize, "maxsize must be larger than minsize"); +static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)"); + +// just a helper to store a triple of K,M,N sizes for matrix product +struct size_triple_t +{ + size_t k, m, n; + size_triple_t() : k(0), m(0), n(0) {} + size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} + size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} + size_triple_t(uint16_t compact) + { + k = 1 << ((compact & 0xf00) >> 8); + m = 1 << ((compact & 0x0f0) >> 4); + n = 1 << ((compact & 0x00f) >> 0); + } +}; + +uint8_t log2_pot(size_t x) { + size_t l = 0; + while (x >>= 1) l++; + return l; +} + +// Convert between size tripes and a compact form fitting in 12 bits +// where each size, which must be a POT, is encoded as its log2, on 4 bits +// so the largest representable size is 2^15 == 32k ... big enough. +uint16_t compact_size_triple(size_t k, size_t m, size_t n) +{ + return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); +} + +uint16_t compact_size_triple(const size_triple_t& t) +{ + return compact_size_triple(t.k, t.m, t.n); +} + +// A single benchmark. Initially only contains benchmark params. +// Then call run(), which stores the result in the gflops field. +struct benchmark_t +{ + uint16_t compact_product_size; + uint16_t compact_block_size; + bool use_default_block_size; + float gflops; + benchmark_t() + : compact_product_size(0) + , compact_block_size(0) + , use_default_block_size(false) + , gflops(0) + { + } + benchmark_t(size_t pk, size_t pm, size_t pn, + size_t bk, size_t bm, size_t bn) + : compact_product_size(compact_size_triple(pk, pm, pn)) + , compact_block_size(compact_size_triple(bk, bm, bn)) + , use_default_block_size(false) + , gflops(0) + {} + benchmark_t(size_t pk, size_t pm, size_t pn) + : compact_product_size(compact_size_triple(pk, pm, pn)) + , compact_block_size(0) + , use_default_block_size(true) + , gflops(0) + {} + + void run(); +}; + +ostream& operator<<(ostream& s, const benchmark_t& b) +{ + s << hex << b.compact_product_size << dec; + if (b.use_default_block_size) { + size_triple_t t(b.compact_product_size); + Index k = t.k, m = t.m, n = t.n; + internal::computeProductBlockingSizes(k, m, n); + s << " default(" << k << ", " << m << ", " << n << ")"; + } else { + s << " " << hex << b.compact_block_size << dec; + } + s << " " << b.gflops; + return s; +} + +// We sort first by increasing benchmark parameters, +// then by decreasing performance. +bool operator<(const benchmark_t& b1, const benchmark_t& b2) +{ + return b1.compact_product_size < b2.compact_product_size || + (b1.compact_product_size == b2.compact_product_size && ( + (b1.compact_block_size < b2.compact_block_size || ( + b1.compact_block_size == b2.compact_block_size && + b1.gflops > b2.gflops)))); +} + +void benchmark_t::run() +{ + size_triple_t productsizes(compact_product_size); + + if (use_default_block_size) { + eigen_use_specific_block_size = false; + } else { + // feed eigen with our custom blocking params + eigen_use_specific_block_size = true; + size_triple_t blocksizes(compact_block_size); + eigen_block_size_k = blocksizes.k; + eigen_block_size_m = blocksizes.m; + eigen_block_size_n = blocksizes.n; + } + + // set up the matrix pool + + const size_t combined_three_matrices_sizes = + sizeof(Scalar) * + (productsizes.k * productsizes.m + + productsizes.k * productsizes.n + + productsizes.m * productsizes.n); + + // 64 M is large enough that nobody has a cache bigger than that, + // while still being small enough that everybody has this much RAM, + // so conveniently we don't need to special-case platforms here. + const size_t unlikely_large_cache_size = 64 << 20; + + const size_t working_set_size = + min_working_set_size ? min_working_set_size : unlikely_large_cache_size; + + const size_t matrix_pool_size = + 1 + working_set_size / combined_three_matrices_sizes; + + MatrixType *lhs = new MatrixType[matrix_pool_size]; + MatrixType *rhs = new MatrixType[matrix_pool_size]; + MatrixType *dst = new MatrixType[matrix_pool_size]; + + for (size_t i = 0; i < matrix_pool_size; i++) { + lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k); + rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n); + dst[i] = MatrixType::Zero(productsizes.m, productsizes.n); + } + + // main benchmark loop + + int iters_at_a_time = 1; + float time_per_iter = 0.0f; + size_t matrix_index = 0; + while (true) { + + double starttime = timer.getCpuTime(); + for (int i = 0; i < iters_at_a_time; i++) { + dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index]; + matrix_index++; + if (matrix_index == matrix_pool_size) { + matrix_index = 0; + } + } + double endtime = timer.getCpuTime(); + + const float timing = float(endtime - starttime); + + if (timing >= min_accurate_time) { + time_per_iter = timing / iters_at_a_time; + break; + } + + iters_at_a_time *= 2; + } + + delete[] lhs; + delete[] rhs; + delete[] dst; + + gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter; +} + +void print_cpuinfo() +{ +#ifdef __linux__ + cout << "contents of /proc/cpuinfo:" << endl; + string line; + ifstream cpuinfo("/proc/cpuinfo"); + if (cpuinfo.is_open()) { + while (getline(cpuinfo, line)) { + cout << line << endl; + } + cpuinfo.close(); + } + cout << endl; +#elif defined __APPLE__ + cout << "output of sysctl hw:" << endl; + system("sysctl hw"); + cout << endl; +#endif +} + +template +string type_name() +{ + return "unknown"; +} + +template<> +string type_name() +{ + return "float"; +} + +template<> +string type_name() +{ + return "double"; +} + +struct action_t +{ + virtual const char* invokation_name() const { abort(); return nullptr; } + virtual void run() const { abort(); } + virtual ~action_t() {} +}; + +void show_usage_and_exit(int /*argc*/, char* argv[], + const vector>& available_actions) +{ + cerr << "usage: " << argv[0] << " [options...]" << endl << endl; + cerr << "available actions:" << endl << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << endl; + cerr << "options:" << endl << endl; + cerr << " --min-working-set-size=N:" << endl; + cerr << " Set the minimum working set size to N bytes." << endl; + cerr << " This is rounded up as needed to a multiple of matrix size." << endl; + cerr << " A larger working set lowers the chance of a warm cache." << endl; + cerr << " The default value 0 means use a large enough working" << endl; + cerr << " set to likely outsize caches." << endl; + cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl; + cerr << " avoid warm caches." << endl; + exit(1); +} + +float measure_clock_speed() +{ + cerr << "Measuring clock speed... \r" << flush; + + vector all_gflops; + for (int i = 0; i < 8; i++) { + benchmark_t b(1024, 1024, 1024); + b.run(); + all_gflops.push_back(b.gflops); + } + + sort(all_gflops.begin(), all_gflops.end()); + float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5]; + + // multiply by an arbitrary constant to discourage trying doing anything with the + // returned values besides just comparing them with each other. + float result = stable_estimate * 123.456f; + + return result; +} + +struct human_duration_t +{ + int seconds; + human_duration_t(int s) : seconds(s) {} +}; + +ostream& operator<<(ostream& s, const human_duration_t& d) +{ + int remainder = d.seconds; + if (remainder > 3600) { + int hours = remainder / 3600; + s << hours << " h "; + remainder -= hours * 3600; + } + if (remainder > 60) { + int minutes = remainder / 60; + s << minutes << " min "; + remainder -= minutes * 60; + } + if (d.seconds < 600) { + s << remainder << " s"; + } + return s; +} + +const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data"; + +void serialize_benchmarks(const char* filename, const vector& benchmarks, size_t first_benchmark_to_run) +{ + FILE* file = fopen(filename, "w"); + if (!file) { + cerr << "Could not open file " << filename << " for writing." << endl; + cerr << "Do you have write permissions on the current working directory?" << endl; + exit(1); + } + size_t benchmarks_vector_size = benchmarks.size(); + fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file); + fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file); + fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file); + fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file); + fclose(file); +} + +bool deserialize_benchmarks(const char* filename, vector& benchmarks, size_t& first_benchmark_to_run) +{ + FILE* file = fopen(filename, "r"); + if (!file) { + return false; + } + if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) { + return false; + } + size_t benchmarks_vector_size = 0; + if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) { + return false; + } + if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) { + return false; + } + benchmarks.resize(benchmarks_vector_size); + if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) { + return false; + } + unlink(filename); + return true; +} + +void try_run_some_benchmarks( + vector& benchmarks, + double time_start, + size_t& first_benchmark_to_run) +{ + if (first_benchmark_to_run == benchmarks.size()) { + return; + } + + double time_last_progress_update = 0; + double time_last_clock_speed_measurement = 0; + double time_now = 0; + + size_t benchmark_index = first_benchmark_to_run; + + while (true) { + float ratio_done = float(benchmark_index) / benchmarks.size(); + time_now = timer.getRealTime(); + + // We check clock speed every minute and at the end. + if (benchmark_index == benchmarks.size() || + time_now > time_last_clock_speed_measurement + 60.0f) + { + time_last_clock_speed_measurement = time_now; + + // Ensure that clock speed is as expected + float current_clock_speed = measure_clock_speed(); + + // The tolerance needs to be smaller than the relative difference between + // clock speeds that a device could operate under. + // It seems unlikely that a device would be throttling clock speeds by + // amounts smaller than 2%. + // With a value of 1%, I was getting within noise on a Sandy Bridge. + const float clock_speed_tolerance = 0.02f; + + if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) { + // Clock speed is now higher than we previously measured. + // Either our initial measurement was inaccurate, which won't happen + // too many times as we are keeping the best clock speed value and + // and allowing some tolerance; or something really weird happened, + // which invalidates all benchmark results collected so far. + // Either way, we better restart all over again now. + if (benchmark_index) { + cerr << "Restarting at " << 100.0f * ratio_done + << " % because clock speed increased. " << endl; + } + max_clock_speed = current_clock_speed; + first_benchmark_to_run = 0; + return; + } + + bool rerun_last_tests = false; + + if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { + cerr << "Measurements completed so far: " + << 100.0f * ratio_done + << " % " << endl; + cerr << "Clock speed seems to be only " + << current_clock_speed/max_clock_speed + << " times what it used to be." << endl; + + unsigned int seconds_to_sleep_if_lower_clock_speed = 1; + + while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { + if (seconds_to_sleep_if_lower_clock_speed > 32) { + cerr << "Sleeping longer probably won't make a difference." << endl; + cerr << "Serializing benchmarks to " << session_filename << endl; + serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run); + cerr << "Now restart this benchmark, and it should pick up where we left." << endl; + exit(2); + } + rerun_last_tests = true; + cerr << "Sleeping " + << seconds_to_sleep_if_lower_clock_speed + << " s... \r" << endl; + sleep(seconds_to_sleep_if_lower_clock_speed); + current_clock_speed = measure_clock_speed(); + seconds_to_sleep_if_lower_clock_speed *= 2; + } + } + + if (rerun_last_tests) { + cerr << "Redoing the last " + << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() + << " % because clock speed had been low. " << endl; + return; + } + + // nothing wrong with the clock speed so far, so there won't be a need to rerun + // benchmarks run so far in case we later encounter a lower clock speed. + first_benchmark_to_run = benchmark_index; + } + + if (benchmark_index == benchmarks.size()) { + // We're done! + first_benchmark_to_run = benchmarks.size(); + // Erase progress info + cerr << " " << endl; + return; + } + + // Display progress info on stderr + if (time_now > time_last_progress_update + 1.0f) { + time_last_progress_update = time_now; + cerr << "Measurements... " << 100.0f * ratio_done + << " %, ETA " + << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done) + << " \r" << flush; + } + + // This is where we actually run a benchmark! + benchmarks[benchmark_index].run(); + benchmark_index++; + } +} + +void run_benchmarks(vector& benchmarks) +{ + size_t first_benchmark_to_run; + vector deserialized_benchmarks; + bool use_deserialized_benchmarks = false; + if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) { + cerr << "Found serialized session with " + << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() + << " % already done" << endl; + if (deserialized_benchmarks.size() == benchmarks.size() && + first_benchmark_to_run > 0 && + first_benchmark_to_run < benchmarks.size()) + { + use_deserialized_benchmarks = true; + } + } + + if (use_deserialized_benchmarks) { + benchmarks = deserialized_benchmarks; + } else { + // not using deserialized benchmarks, starting from scratch + first_benchmark_to_run = 0; + + // Randomly shuffling benchmarks allows us to get accurate enough progress info, + // as now the cheap/expensive benchmarks are randomly mixed so they average out. + // It also means that if data is corrupted for some time span, the odds are that + // not all repetitions of a given benchmark will be corrupted. + random_shuffle(benchmarks.begin(), benchmarks.end()); + } + + for (int i = 0; i < 4; i++) { + max_clock_speed = max(max_clock_speed, measure_clock_speed()); + } + + double time_start = 0.0; + while (first_benchmark_to_run < benchmarks.size()) { + if (first_benchmark_to_run == 0) { + time_start = timer.getRealTime(); + } + try_run_some_benchmarks(benchmarks, + time_start, + first_benchmark_to_run); + } + + // Sort timings by increasing benchmark parameters, and decreasing gflops. + // The latter is very important. It means that we can ignore all but the first + // benchmark with given parameters. + sort(benchmarks.begin(), benchmarks.end()); + + // Collect best (i.e. now first) results for each parameter values. + vector best_benchmarks; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + if (best_benchmarks.empty() || + best_benchmarks.back().compact_product_size != it->compact_product_size || + best_benchmarks.back().compact_block_size != it->compact_block_size) + { + best_benchmarks.push_back(*it); + } + } + + // keep and return only the best benchmarks + benchmarks = best_benchmarks; +} + +struct measure_all_pot_sizes_action_t : action_t +{ + virtual const char* invokation_name() const { return "all-pot-sizes"; } + virtual void run() const + { + vector benchmarks; + for (int repetition = 0; repetition < measurement_repetitions; repetition++) { + for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { + for (size_t msize = minsize; msize <= maxsize; msize *= 2) { + for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { + for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) { + for (size_t mblock = minsize; mblock <= msize; mblock *= 2) { + for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) { + benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock); + } + } + } + } + } + } + } + + run_benchmarks(benchmarks); + + cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + cout << *it << endl; + } + } +}; + +struct measure_default_sizes_action_t : action_t +{ + virtual const char* invokation_name() const { return "default-sizes"; } + virtual void run() const + { + vector benchmarks; + for (int repetition = 0; repetition < measurement_repetitions; repetition++) { + for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { + for (size_t msize = minsize; msize <= maxsize; msize *= 2) { + for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { + benchmarks.emplace_back(ksize, msize, nsize); + } + } + } + } + + run_benchmarks(benchmarks); + + cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + cout << *it << endl; + } + } +}; + +int main(int argc, char* argv[]) +{ + double time_start = timer.getRealTime(); + cout.precision(4); + cerr.precision(4); + + vector> available_actions; + available_actions.emplace_back(new measure_all_pot_sizes_action_t); + available_actions.emplace_back(new measure_default_sizes_action_t); + + auto action = available_actions.end(); + + if (argc <= 1) { + show_usage_and_exit(argc, argv, available_actions); + } + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[1], (*it)->invokation_name())) { + action = it; + break; + } + } + + if (action == available_actions.end()) { + show_usage_and_exit(argc, argv, available_actions); + } + + for (int i = 2; i < argc; i++) { + if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { + const char* equals_sign = strchr(argv[i], '='); + min_working_set_size = strtoul(equals_sign+1, nullptr, 10); + } else { + cerr << "unrecognized option: " << argv[i] << endl << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + + print_cpuinfo(); + + cout << "benchmark parameters:" << endl; + cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl; + cout << "scalar type: " << type_name() << endl; + cout << "packet size: " << internal::packet_traits::size << endl; + cout << "minsize = " << minsize << endl; + cout << "maxsize = " << maxsize << endl; + cout << "measurement_repetitions = " << measurement_repetitions << endl; + cout << "min_accurate_time = " << min_accurate_time << endl; + cout << "min_working_set_size = " << min_working_set_size; + if (min_working_set_size == 0) { + cout << " (try to outsize caches)"; + } + cout << endl << endl; + + (*action)->run(); + + double time_end = timer.getRealTime(); + cerr << "Finished in " << human_duration_t(time_end - time_start) << endl; +} diff --git a/thirdparty/eigen/bench/benchmark.cpp b/thirdparty/eigen/bench/benchmark.cpp new file mode 100644 index 000000000..c721b9081 --- /dev/null +++ b/thirdparty/eigen/bench/benchmark.cpp @@ -0,0 +1,39 @@ +// g++ -O3 -DNDEBUG -DMATSIZE= benchmark.cpp -o benchmark && time ./benchmark + +#include + +#include + +#ifndef MATSIZE +#define MATSIZE 3 +#endif + +using namespace std; +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 40000000 +#endif + +#ifndef SCALAR +#define SCALAR double +#endif + +int main(int argc, char *argv[]) +{ + Matrix I = Matrix::Ones(); + Matrix m; + for(int i = 0; i < MATSIZE; i++) + for(int j = 0; j < MATSIZE; j++) + { + m(i,j) = (i+MATSIZE*j); + } + asm("#begin"); + for(int a = 0; a < REPEAT; a++) + { + m = Matrix::Ones() + 0.00005 * (m + (m*m)); + } + asm("#end"); + cout << m << endl; + return 0; +} diff --git a/thirdparty/eigen/bench/benchmarkSlice.cpp b/thirdparty/eigen/bench/benchmarkSlice.cpp new file mode 100644 index 000000000..c5b89c545 --- /dev/null +++ b/thirdparty/eigen/bench/benchmarkSlice.cpp @@ -0,0 +1,38 @@ +// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX + +#include + +#include + +using namespace std; +using namespace Eigen; + +#ifndef REPEAT +#define REPEAT 10000 +#endif + +#ifndef SCALAR +#define SCALAR float +#endif + +int main(int argc, char *argv[]) +{ + typedef Matrix Mat; + Mat m(100, 100); + m.setRandom(); + + for(int a = 0; a < REPEAT; a++) + { + int r, c, nr, nc; + r = Eigen::internal::random(0,10); + c = Eigen::internal::random(0,10); + nr = Eigen::internal::random(50,80); + nc = Eigen::internal::random(50,80); + m.block(r,c,nr,nc) += Mat::Ones(nr,nc); + m.block(r,c,nr,nc) *= SCALAR(10); + m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10); + m.block(r,c,nr,nc) /= SCALAR(10); + } + cout << m[0] << endl; + return 0; +} diff --git a/thirdparty/eigen/bench/benchmarkX.cpp b/thirdparty/eigen/bench/benchmarkX.cpp new file mode 100644 index 000000000..8e4b60c2b --- /dev/null +++ b/thirdparty/eigen/bench/benchmarkX.cpp @@ -0,0 +1,36 @@ +// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b + +#include + +#include + +using namespace std; +using namespace Eigen; + +#ifndef MATTYPE +#define MATTYPE MatrixXLd +#endif + +#ifndef MATSIZE +#define MATSIZE 400 +#endif + +#ifndef REPEAT +#define REPEAT 100 +#endif + +int main(int argc, char *argv[]) +{ + MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE); + MATTYPE m(MATSIZE,MATSIZE); + for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++) + { + m(i,j) = (i+j+1)/(MATSIZE*MATSIZE); + } + for(int a = 0; a < REPEAT; a++) + { + m = I + 0.0001 * (m + m*m); + } + cout << m(0,0) << endl; + return 0; +} diff --git a/thirdparty/eigen/bench/benchmarkXcwise.cpp b/thirdparty/eigen/bench/benchmarkXcwise.cpp new file mode 100644 index 000000000..62437435e --- /dev/null +++ b/thirdparty/eigen/bench/benchmarkXcwise.cpp @@ -0,0 +1,35 @@ +// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX + +#include +#include + +using namespace std; +using namespace Eigen; + +#ifndef VECTYPE +#define VECTYPE VectorXLd +#endif + +#ifndef VECSIZE +#define VECSIZE 1000000 +#endif + +#ifndef REPEAT +#define REPEAT 1000 +#endif + +int main(int argc, char *argv[]) +{ + VECTYPE I = VECTYPE::Ones(VECSIZE); + VECTYPE m(VECSIZE,1); + for(int i = 0; i < VECSIZE; i++) + { + m[i] = 0.1 * i/VECSIZE; + } + for(int a = 0; a < REPEAT; a++) + { + m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4); + } + cout << m[0] << endl; + return 0; +} diff --git a/thirdparty/eigen/bench/benchmark_suite b/thirdparty/eigen/bench/benchmark_suite new file mode 100755 index 000000000..3f21d3661 --- /dev/null +++ b/thirdparty/eigen/bench/benchmark_suite @@ -0,0 +1,18 @@ +#!/bin/bash +CXX=${CXX-g++} # default value unless caller has defined CXX +echo "Fixed size 3x3, column-major, -DNDEBUG" +$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Fixed size 3x3, column-major, with asserts" +$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Fixed size 3x3, row-major, -DNDEBUG" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Fixed size 3x3, row-major, with asserts" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null +echo "Dynamic size 20x20, column-major, -DNDEBUG" +$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null +echo "Dynamic size 20x20, column-major, with asserts" +$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null +echo "Dynamic size 20x20, row-major, -DNDEBUG" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null +echo "Dynamic size 20x20, row-major, with asserts" +$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null diff --git a/thirdparty/eigen/bench/btl/CMakeLists.txt b/thirdparty/eigen/bench/btl/CMakeLists.txt new file mode 100644 index 000000000..38ff9f483 --- /dev/null +++ b/thirdparty/eigen/bench/btl/CMakeLists.txt @@ -0,0 +1,107 @@ +PROJECT(BTL) + +CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2) + +set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${Eigen_SOURCE_DIR}/cmake) +include(MacroOptionalAddSubdirectory) + +OPTION(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF) + +SET(CMAKE_INCLUDE_CURRENT_DIR ON) + +string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER}) +IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) + SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}") + SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}") + IF(BTL_NOVEC) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") + ENDIF(BTL_NOVEC) +ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) + +IF(MSVC) + SET(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG") +# SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") + IF(BTL_NOVEC) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") + ENDIF(BTL_NOVEC) +ENDIF(MSVC) + +if(IS_ICPC) + set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}") + set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}") +endif(IS_ICPC) + +include_directories( + ${PROJECT_SOURCE_DIR}/actions + ${PROJECT_SOURCE_DIR}/generic_bench + ${PROJECT_SOURCE_DIR}/generic_bench/utils + ${PROJECT_SOURCE_DIR}/libs/STL) + +# find_package(MKL) +# if (MKL_FOUND) +# add_definitions(-DHAVE_MKL) +# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES}) +# endif (MKL_FOUND) + +find_library(EIGEN_BTL_RT_LIBRARY rt) +# if we cannot find it easily, then we don't need it! +if(NOT EIGEN_BTL_RT_LIBRARY) + set(EIGEN_BTL_RT_LIBRARY "") +endif() + +MACRO(BTL_ADD_BENCH targetname) + + foreach(_current_var ${ARGN}) + set(_last_var ${_current_var}) + endforeach(_current_var) + + set(_sources ${ARGN}) + list(LENGTH _sources _argn_length) + + list(REMOVE_ITEM _sources ON OFF TRUE FALSE) + + list(LENGTH _sources _src_length) + + if (${_argn_length} EQUAL ${_src_length}) + set(_last_var ON) + endif (${_argn_length} EQUAL ${_src_length}) + + OPTION(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var}) + + IF(BUILD_${targetname}) + ADD_EXECUTABLE(${targetname} ${_sources}) + ADD_TEST(${targetname} "${targetname}") + target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY}) + ENDIF(BUILD_${targetname}) + +ENDMACRO(BTL_ADD_BENCH) + +macro(btl_add_target_property target prop value) + + if(BUILD_${target}) + get_target_property(previous ${target} ${prop}) + if(NOT previous) + set(previous "") + endif() + set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}") + endif() + +endmacro(btl_add_target_property) + +ENABLE_TESTING() + +add_subdirectory(libs/eigen3) +add_subdirectory(libs/eigen2) +add_subdirectory(libs/tensors) +add_subdirectory(libs/BLAS) +add_subdirectory(libs/ublas) +add_subdirectory(libs/gmm) +add_subdirectory(libs/mtl4) +add_subdirectory(libs/blitz) +add_subdirectory(libs/tvmet) +add_subdirectory(libs/STL) +add_subdirectory(libs/blaze) + +add_subdirectory(data) + + diff --git a/thirdparty/eigen/bench/btl/COPYING b/thirdparty/eigen/bench/btl/COPYING new file mode 100644 index 000000000..486449cc3 --- /dev/null +++ b/thirdparty/eigen/bench/btl/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/thirdparty/eigen/bench/btl/README b/thirdparty/eigen/bench/btl/README new file mode 100644 index 000000000..f3f5fb36f --- /dev/null +++ b/thirdparty/eigen/bench/btl/README @@ -0,0 +1,154 @@ +Bench Template Library + +**************************************** +Introduction : + +The aim of this project is to compare the performance +of available numerical libraries. The code is designed +as generic and modular as possible. Thus, adding new +numerical libraries or new numerical tests should +require minimal effort. + + +***************************************** + +Installation : + +BTL uses cmake / ctest: + +1 - create a build directory: + + $ mkdir build + $ cd build + +2 - configure: + + $ ccmake .. + +3 - run the bench using ctest: + + $ ctest -V + +You can run the benchmarks only on libraries matching a given regular expression: + ctest -V -R +For instance: + ctest -V -R eigen2 + +You can also select a given set of actions defining the environment variable BTL_CONFIG this way: + BTL_CONFIG="-a action1{:action2}*" ctest -V +An exemple: + BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2 + +Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option: + BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2 + +4 : Analyze the result. different data files (.dat) are produced in each libs directories. + If gnuplot is available, choose a directory name in the data directory to store the results and type: + $ cd data + $ mkdir my_directory + $ cp ../libs/*/*.dat my_directory + Build the data utilities in this (data) directory + make + Then you can look the raw data, + go_mean my_directory + or smooth the data first : + smooth_all.sh my_directory + go_mean my_directory_smooth + + +************************************************* + +Files and directories : + + generic_bench : all the bench sources common to all libraries + + actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested. + + libs/* : bench sources specific to each tested libraries. + + machine_dep : directory used to store machine specific Makefile.in + + data : directory used to store gnuplot scripts and data analysis utilities + +************************************************** + +Principles : the code modularity is achieved by defining two concepts : + + ****** Action concept : This is a class defining which kind + of test must be performed (e.g. a matrix_vector_product). + An Action should define the following methods : + + *** Ctor using the size of the problem (matrix or vector size) as an argument + Action action(size); + *** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments) + action.initialize(); + *** calculate : this method actually launch the calculation to be benchmarked + action.calculate; + *** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation) + *** name() : this method returns the name of the action (std::string) + + ****** Interface concept : This is a class or namespace defining how to use a given library and + its specific containers (matrix and vector). Up to now an interface should following types + + *** real_type : kind of float to be used (float or double) + *** stl_vector : must correspond to std::vector + *** stl_matrix : must correspond to std::vector + *** gene_vector : the vector type for this interface --> e.g. (real_type *) for the C_interface + *** gene_matrix : the matrix type for this interface --> e.g. (gene_vector *) for the C_interface + + + the following common methods + + *** free_matrix(gene_matrix & A, int N) dealocation of a N sized gene_matrix A + *** free_vector(gene_vector & B) dealocation of a N sized gene_vector B + *** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A. + The allocation of A is done in this function. + *** vector_to_stl(gene_vector & B, stl_vector & B_stl) copy the content of an stl_vector B_stl into a gene_vector B. + The allocation of B is done in this function. + *** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl. + The size of A_STL must corresponds to the size of A. + *** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl. + The size of B_STL must corresponds to the size of B. + *** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source + and cible must be sized NxN. + *** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source + and cible must be sized N. + + and the following method corresponding to the action one wants to be benchmarked : + + *** matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N) + *** matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N) + *** ata_product(const gene_matrix & A, gene_matrix & X, int N) + *** aat_product(const gene_matrix & A, gene_matrix & X, int N) + *** axpy(real coef, const gene_vector & X, gene_vector & Y, int N) + + The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with + an interface. A typical main.cpp source stored in a given library directory libs/A_LIB + looks like : + + bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ; + + this function will produce XY data file containing measured mflops as a function of the size for 50 + sizes between 10 and 10000. + + This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time + measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides + a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer + so + + bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ; + + is equivalent to + + bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ; + + If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer). + replace + bench(size_min,size_max,nb_point); + with + bench(size_min,size_max,nb_point); + in generic/bench.hh + +. + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_aat_product.hh b/thirdparty/eigen/bench/btl/actions/action_aat_product.hh new file mode 100644 index 000000000..aa5b35c94 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_aat_product.hh @@ -0,0 +1,145 @@ +//===================================================== +// File : action_aat_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_AAT_PRODUCT +#define ACTION_AAT_PRODUCT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_aat_product { + +public : + + // Ctor + + Action_aat_product( int size ):_size(size) + { + MESSAGE("Action_aat_product Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_matrix(X_stl,_size); + init_matrix(resu_stl,_size); + + // generic matrix and vector initialization + + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(X_ref,X_stl); + + Interface::matrix_from_stl(A,A_stl); + Interface::matrix_from_stl(X,X_stl); + + } + + // invalidate copy ctor + + Action_aat_product( const Action_aat_product & ) + { + INFOS("illegal call to Action_aat_product Copy Ctor"); + exit(0); + } + + // Dtor + + ~Action_aat_product( void ){ + + MESSAGE("Action_aat_product Dtor"); + + // deallocation + + Interface::free_matrix(A,_size); + Interface::free_matrix(X,_size); + + Interface::free_matrix(A_ref,_size); + Interface::free_matrix(X_ref,_size); + + } + + // action name + + static inline std::string name( void ) + { + return "aat_"+Interface::name(); + } + + double nb_op_base( void ){ + return double(_size)*double(_size)*double(_size); + } + + inline void initialize( void ){ + + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_matrix(X_ref,X,_size); + + } + + inline void calculate( void ) { + + Interface::aat_product(A,X,_size); + + } + + void check_result( void ){ + if (_size>128) return; + // calculation check + + Interface::matrix_to_stl(X,resu_stl); + + STL_interface::aat_product(A_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(1); + } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_matrix X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_matrix X; + + + int _size; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_ata_product.hh b/thirdparty/eigen/bench/btl/actions/action_ata_product.hh new file mode 100644 index 000000000..04364fe67 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_ata_product.hh @@ -0,0 +1,145 @@ +//===================================================== +// File : action_ata_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_ATA_PRODUCT +#define ACTION_ATA_PRODUCT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_ata_product { + +public : + + // Ctor + + Action_ata_product( int size ):_size(size) + { + MESSAGE("Action_ata_product Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_matrix(X_stl,_size); + init_matrix(resu_stl,_size); + + // generic matrix and vector initialization + + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(X_ref,X_stl); + + Interface::matrix_from_stl(A,A_stl); + Interface::matrix_from_stl(X,X_stl); + + } + + // invalidate copy ctor + + Action_ata_product( const Action_ata_product & ) + { + INFOS("illegal call to Action_ata_product Copy Ctor"); + exit(0); + } + + // Dtor + + ~Action_ata_product( void ){ + + MESSAGE("Action_ata_product Dtor"); + + // deallocation + + Interface::free_matrix(A,_size); + Interface::free_matrix(X,_size); + + Interface::free_matrix(A_ref,_size); + Interface::free_matrix(X_ref,_size); + + } + + // action name + + static inline std::string name( void ) + { + return "ata_"+Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size*_size*_size; + } + + inline void initialize( void ){ + + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_matrix(X_ref,X,_size); + + } + + inline void calculate( void ) { + + Interface::ata_product(A,X,_size); + + } + + void check_result( void ){ + if (_size>128) return; + // calculation check + + Interface::matrix_to_stl(X,resu_stl); + + STL_interface::ata_product(A_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(1); + } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_matrix X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_matrix X; + + + int _size; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_atv_product.hh b/thirdparty/eigen/bench/btl/actions/action_atv_product.hh new file mode 100644 index 000000000..a8234514b --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_atv_product.hh @@ -0,0 +1,134 @@ +//===================================================== +// File : action_atv_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_ATV_PRODUCT +#define ACTION_ATV_PRODUCT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_atv_product { + +public : + + Action_atv_product( int size ) : _size(size) + { + MESSAGE("Action_atv_product Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_vector(B_stl,_size); + init_vector(X_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + + Interface::matrix_from_stl(A_ref,A_stl); + Interface::vector_from_stl(B_ref,B_stl); + Interface::vector_from_stl(X_ref,X_stl); + + Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X,X_stl); + } + + // invalidate copy ctor + Action_atv_product( const Action_atv_product & ) + { + INFOS("illegal call to Action_atv_product Copy Ctor"); + exit(1); + } + + ~Action_atv_product( void ) + { + MESSAGE("Action_atv_product Dtor"); + + Interface::free_matrix(A,_size); + Interface::free_vector(B); + Interface::free_vector(X); + + Interface::free_matrix(A_ref,_size); + Interface::free_vector(B_ref); + Interface::free_vector(X_ref); + } + + static inline std::string name() { return "atv_" + Interface::name(); } + + double nb_op_base( void ) { return 2.0*_size*_size; } + + inline void initialize( void ){ + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_vector(B_ref,B,_size); + Interface::copy_vector(X_ref,X,_size); + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("begin atv"); + Interface::atv_product(A,B,X,_size); + BTL_ASM_COMMENT("end atv"); + } + + void check_result( void ) + { + if (_size>128) return; + Interface::vector_to_stl(X,resu_stl); + + STL_interface::atv_product(A_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(1); + } + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_vector B_ref; + typename Interface::gene_vector X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_vector B; + typename Interface::gene_vector X; + + + int _size; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_axpby.hh b/thirdparty/eigen/bench/btl/actions/action_axpby.hh new file mode 100644 index 000000000..dadd0ccf3 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_axpby.hh @@ -0,0 +1,127 @@ +//===================================================== +// File : action_axpby.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_AXPBY +#define ACTION_AXPBY +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_axpby { + +public : + + // Ctor + Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size) + { + MESSAGE("Action_axpby Ctor"); + + // STL vector initialization + init_vector(X_stl,_size); + init_vector(Y_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(Y_ref,Y_stl); + + Interface::vector_from_stl(X,X_stl); + Interface::vector_from_stl(Y,Y_stl); + } + + // invalidate copy ctor + Action_axpby( const Action_axpby & ) + { + INFOS("illegal call to Action_axpby Copy Ctor"); + exit(1); + } + + // Dtor + ~Action_axpby( void ){ + MESSAGE("Action_axpby Dtor"); + + // deallocation + Interface::free_vector(X_ref); + Interface::free_vector(Y_ref); + + Interface::free_vector(X); + Interface::free_vector(Y); + } + + // action name + static inline std::string name( void ) + { + return "axpby_"+Interface::name(); + } + + double nb_op_base( void ){ + return 3.0*_size; + } + + inline void initialize( void ){ + Interface::copy_vector(X_ref,X,_size); + Interface::copy_vector(Y_ref,Y,_size); + } + + inline void calculate( void ) { + BTL_ASM_COMMENT("mybegin axpby"); + Interface::axpby(_alpha,X,_beta,Y,_size); + BTL_ASM_COMMENT("myend axpby"); + } + + void check_result( void ){ + if (_size>128) return; + // calculation check + Interface::vector_to_stl(Y,resu_stl); + + STL_interface::axpby(_alpha,X_stl,_beta,Y_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(Y_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(2); + } + } + +private : + + typename Interface::stl_vector X_stl; + typename Interface::stl_vector Y_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_vector X_ref; + typename Interface::gene_vector Y_ref; + + typename Interface::gene_vector X; + typename Interface::gene_vector Y; + + typename Interface::real_type _alpha; + typename Interface::real_type _beta; + + int _size; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_axpy.hh b/thirdparty/eigen/bench/btl/actions/action_axpy.hh new file mode 100644 index 000000000..261be4cb8 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_axpy.hh @@ -0,0 +1,139 @@ +//===================================================== +// File : action_axpy.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_AXPY +#define ACTION_AXPY +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_axpy { + +public : + + // Ctor + + Action_axpy( int size ):_coef(1.0),_size(size) + { + MESSAGE("Action_axpy Ctor"); + + // STL vector initialization + + init_vector(X_stl,_size); + init_vector(Y_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(Y_ref,Y_stl); + + Interface::vector_from_stl(X,X_stl); + Interface::vector_from_stl(Y,Y_stl); + + + } + + // invalidate copy ctor + + Action_axpy( const Action_axpy & ) + { + INFOS("illegal call to Action_axpy Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_axpy( void ){ + + MESSAGE("Action_axpy Dtor"); + + // deallocation + + Interface::free_vector(X_ref); + Interface::free_vector(Y_ref); + + Interface::free_vector(X); + Interface::free_vector(Y); + } + + // action name + + static inline std::string name( void ) + { + return "axpy_"+Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size; + } + + inline void initialize( void ){ + Interface::copy_vector(X_ref,X,_size); + Interface::copy_vector(Y_ref,Y,_size); + } + + inline void calculate( void ) { + BTL_ASM_COMMENT("mybegin axpy"); + Interface::axpy(_coef,X,Y,_size); + BTL_ASM_COMMENT("myend axpy"); + } + + void check_result( void ){ + if (_size>128) return; + // calculation check + + Interface::vector_to_stl(Y,resu_stl); + + STL_interface::axpy(_coef,X_stl,Y_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(Y_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(0); + } + + } + +private : + + typename Interface::stl_vector X_stl; + typename Interface::stl_vector Y_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_vector X_ref; + typename Interface::gene_vector Y_ref; + + typename Interface::gene_vector X; + typename Interface::gene_vector Y; + + typename Interface::real_type _coef; + + int _size; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_cholesky.hh b/thirdparty/eigen/bench/btl/actions/action_cholesky.hh new file mode 100644 index 000000000..5f66d113a --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_cholesky.hh @@ -0,0 +1,128 @@ +//===================================================== +// File : action_cholesky.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_CHOLESKY +#define ACTION_CHOLESKY +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_cholesky { + +public : + + // Ctor + + Action_cholesky( int size ):_size(size) + { + MESSAGE("Action_cholesky Ctor"); + + // STL mat/vec initialization + init_matrix_symm(X_stl,_size); + init_matrix(C_stl,_size); + + // make sure X is invertible + for (int i=0; i<_size; ++i) + X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100; + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 0; + for (int j=0; j<_size; ++j) + { + double r = std::max(_size - j -1,0); + _cost += 2*(r*j+r+j); + } + } + + // invalidate copy ctor + + Action_cholesky( const Action_cholesky & ) + { + INFOS("illegal call to Action_cholesky Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_cholesky( void ){ + + MESSAGE("Action_cholesky Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) + { + return "cholesky_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::cholesky(X,C,_size); + } + + void check_result( void ){ + // calculation check +// STL_interface::cholesky(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + double _cost; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_ger.hh b/thirdparty/eigen/bench/btl/actions/action_ger.hh new file mode 100644 index 000000000..dc766efc5 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_ger.hh @@ -0,0 +1,128 @@ + +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_GER +#define ACTION_GER +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_ger { + +public : + + // Ctor + BTL_DONT_INLINE Action_ger( int size ):_size(size) + { + MESSAGE("Action_ger Ctor"); + + // STL matrix and vector initialization + typename Interface::stl_matrix tmp; + init_matrix(A_stl,_size); + init_vector(B_stl,_size); + init_vector(X_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B_ref,B_stl); + Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(X,X_stl); + } + + // invalidate copy ctor + Action_ger( const Action_ger & ) + { + INFOS("illegal call to Action_ger Copy Ctor"); + exit(1); + } + + // Dtor + BTL_DONT_INLINE ~Action_ger( void ){ + MESSAGE("Action_ger Dtor"); + Interface::free_matrix(A,_size); + Interface::free_vector(B); + Interface::free_vector(X); + Interface::free_matrix(A_ref,_size); + Interface::free_vector(B_ref); + Interface::free_vector(X_ref); + + } + + // action name + static inline std::string name( void ) + { + return "ger_" + Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size*_size; + } + + BTL_DONT_INLINE void initialize( void ){ + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_vector(B_ref,B,_size); + Interface::copy_vector(X_ref,X,_size); + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("#begin ger"); + Interface::ger(A,B,X,_size); + BTL_ASM_COMMENT("end ger"); + } + + BTL_DONT_INLINE void check_result( void ){ + // calculation check + Interface::vector_to_stl(X,resu_stl); + + STL_interface::ger(A_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-3){ + INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); + } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_vector B_ref; + typename Interface::gene_vector X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_vector B; + typename Interface::gene_vector X; + + int _size; +}; + + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_hessenberg.hh b/thirdparty/eigen/bench/btl/actions/action_hessenberg.hh new file mode 100644 index 000000000..2100ebd89 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_hessenberg.hh @@ -0,0 +1,233 @@ +//===================================================== +// File : action_hessenberg.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_HESSENBERG +#define ACTION_HESSENBERG +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_hessenberg { + +public : + + // Ctor + + Action_hessenberg( int size ):_size(size) + { + MESSAGE("Action_hessenberg Ctor"); + + // STL vector initialization + init_matrix(X_stl,_size); + + init_matrix(C_stl,_size); + init_matrix(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 0; + for (int j=0; j<_size-2; ++j) + { + double r = std::max(0,_size-j-1); + double b = std::max(0,_size-j-2); + _cost += 6 + 3*b + r*r*4 + r*_size*4; + } + } + + // invalidate copy ctor + + Action_hessenberg( const Action_hessenberg & ) + { + INFOS("illegal call to Action_hessenberg Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_hessenberg( void ){ + + MESSAGE("Action_hessenberg Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) + { + return "hessenberg_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::hessenberg(X,C,_size); + } + + void check_result( void ){ + // calculation check + Interface::matrix_to_stl(C,resu_stl); + +// STL_interface::hessenberg(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + double _cost; +}; + +template +class Action_tridiagonalization { + +public : + + // Ctor + + Action_tridiagonalization( int size ):_size(size) + { + MESSAGE("Action_tridiagonalization Ctor"); + + // STL vector initialization + init_matrix(X_stl,_size); + + for(int i=0; i<_size; ++i) + { + for(int j=0; j(C_stl,_size); + init_matrix(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 0; + for (int j=0; j<_size-2; ++j) + { + double r = std::max(0,_size-j-1); + double b = std::max(0,_size-j-2); + _cost += 6. + 3.*b + r*r*8.; + } + } + + // invalidate copy ctor + + Action_tridiagonalization( const Action_tridiagonalization & ) + { + INFOS("illegal call to Action_tridiagonalization Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_tridiagonalization( void ){ + + MESSAGE("Action_tridiagonalization Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) { return "tridiagonalization_"+Interface::name(); } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::tridiagonalization(X,C,_size); + } + + void check_result( void ){ + // calculation check + Interface::matrix_to_stl(C,resu_stl); + +// STL_interface::tridiagonalization(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + double _cost; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_lu_decomp.hh b/thirdparty/eigen/bench/btl/actions/action_lu_decomp.hh new file mode 100644 index 000000000..2448e82c4 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_lu_decomp.hh @@ -0,0 +1,124 @@ +//===================================================== +// File : action_lu_decomp.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_LU_DECOMP +#define ACTION_LU_DECOMP +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_lu_decomp { + +public : + + // Ctor + + Action_lu_decomp( int size ):_size(size) + { + MESSAGE("Action_lu_decomp Ctor"); + + // STL vector initialization + init_matrix(X_stl,_size); + + init_matrix(C_stl,_size); + init_matrix(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 2.0*size*size*size/3.0 + size*size; + } + + // invalidate copy ctor + + Action_lu_decomp( const Action_lu_decomp & ) + { + INFOS("illegal call to Action_lu_decomp Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_lu_decomp( void ){ + + MESSAGE("Action_lu_decomp Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) + { + return "complete_lu_decomp_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::lu_decomp(X,C,_size); + } + + void check_result( void ){ + // calculation check + Interface::matrix_to_stl(C,resu_stl); + +// STL_interface::lu_decomp(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + double _cost; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_lu_solve.hh b/thirdparty/eigen/bench/btl/actions/action_lu_solve.hh new file mode 100644 index 000000000..5a81e6341 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_lu_solve.hh @@ -0,0 +1,136 @@ +//===================================================== +// File : action_lu_solve.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_LU_SOLVE +#define ACTION_LU_SOLVE +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_lu_solve +{ + +public : + + static inline std::string name( void ) + { + return "lu_solve_"+Interface::name(); + } + + static double nb_op_base(int size){ + return 2.0*size*size*size/3.0; // questionable but not really important + } + + + static double calculate( int nb_calc, int size ) { + + // STL matrix and vector initialization + + typename Interface::stl_matrix A_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector X_stl; + + init_matrix(A_stl,size); + init_vector(B_stl,size); + init_vector(X_stl,size); + + // generic matrix and vector initialization + + typename Interface::gene_matrix A; + typename Interface::gene_vector B; + typename Interface::gene_vector X; + + typename Interface::gene_matrix LU; + + Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X,X_stl); + Interface::matrix_from_stl(LU,A_stl); + + // local variable : + + typename Interface::Pivot_Vector pivot; // pivot vector + Interface::new_Pivot_Vector(pivot,size); + + // timer utilities + + Portable_Timer chronos; + + // time measurement + + chronos.start(); + + for (int ii=0;ii::matrix_vector_product(A_stl,X_stl,B_new_stl,size); + + typename Interface::real_type error= + STL_interface::norm_diff(B_stl,B_new_stl); + + if (error>1.e-5){ + INFOS("WRONG CALCULATION...residual=" << error); + STL_interface::display_vector(B_stl); + STL_interface::display_vector(B_new_stl); + exit(0); + } + + // deallocation and return time + + Interface::free_matrix(A,size); + Interface::free_vector(B); + Interface::free_vector(X); + Interface::free_Pivot_Vector(pivot); + + return time; + } + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product.hh b/thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product.hh new file mode 100644 index 000000000..f65ee0529 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product.hh @@ -0,0 +1,150 @@ +//===================================================== +// File : action_matrix_matrix_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_MATRIX_MATRIX_PRODUCT +#define ACTION_MATRIX_MATRIX_PRODUCT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_matrix_matrix_product { + +public : + + // Ctor + + Action_matrix_matrix_product( int size ):_size(size) + { + MESSAGE("Action_matrix_matrix_product Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_matrix(B_stl,_size); + init_matrix(X_stl,_size); + init_matrix(resu_stl,_size); + + // generic matrix and vector initialization + + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(B_ref,B_stl); + Interface::matrix_from_stl(X_ref,X_stl); + + Interface::matrix_from_stl(A,A_stl); + Interface::matrix_from_stl(B,B_stl); + Interface::matrix_from_stl(X,X_stl); + + } + + // invalidate copy ctor + + Action_matrix_matrix_product( const Action_matrix_matrix_product & ) + { + INFOS("illegal call to Action_matrix_matrix_product Copy Ctor"); + exit(0); + } + + // Dtor + + ~Action_matrix_matrix_product( void ){ + + MESSAGE("Action_matrix_matrix_product Dtor"); + + // deallocation + + Interface::free_matrix(A,_size); + Interface::free_matrix(B,_size); + Interface::free_matrix(X,_size); + + Interface::free_matrix(A_ref,_size); + Interface::free_matrix(B_ref,_size); + Interface::free_matrix(X_ref,_size); + + } + + // action name + + static inline std::string name( void ) + { + return "matrix_matrix_"+Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size*_size*_size; + } + + inline void initialize( void ){ + + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_matrix(B_ref,B,_size); + Interface::copy_matrix(X_ref,X,_size); + + } + + inline void calculate( void ) { + Interface::matrix_matrix_product(A,B,X,_size); + } + + void check_result( void ){ + + // calculation check + if (_size<200) + { + Interface::matrix_to_stl(X,resu_stl); + STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,_size); + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(1); + } + } + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_matrix B_stl; + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_matrix B_ref; + typename Interface::gene_matrix X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_matrix B; + typename Interface::gene_matrix X; + + + int _size; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product_bis.hh b/thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product_bis.hh new file mode 100644 index 000000000..29c10a6e2 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_matrix_matrix_product_bis.hh @@ -0,0 +1,152 @@ +//===================================================== +// File : action_matrix_matrix_product_bis.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_MATRIX_MATRIX_PRODUCT_BIS +#define ACTION_MATRIX_MATRIX_PRODUCT_BIS +#include "utilities.h" +#include "STL_interface.hh" +#include "STL_timer.hh" +#include +#include "init_function.hh" +#include "init_vector.hh" +#include "init_matrix.hh" + +using namespace std; + +template +class Action_matrix_matrix_product_bis { + +public : + + static inline std::string name( void ) + { + return "matrix_matrix_"+Interface::name(); + } + + static double nb_op_base(int size){ + return 2.0*size*size*size; + } + + static double calculate( int nb_calc, int size ) { + + // STL matrix and vector initialization + + typename Interface::stl_matrix A_stl; + typename Interface::stl_matrix B_stl; + typename Interface::stl_matrix X_stl; + + init_matrix(A_stl,size); + init_matrix(B_stl,size); + init_matrix(X_stl,size); + + // generic matrix and vector initialization + + typename Interface::gene_matrix A_ref; + typename Interface::gene_matrix B_ref; + typename Interface::gene_matrix X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_matrix B; + typename Interface::gene_matrix X; + + + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(B_ref,B_stl); + Interface::matrix_from_stl(X_ref,X_stl); + + Interface::matrix_from_stl(A,A_stl); + Interface::matrix_from_stl(B,B_stl); + Interface::matrix_from_stl(X,X_stl); + + + // STL_timer utilities + + STL_timer chronos; + + // Baseline evaluation + + chronos.start_baseline(nb_calc); + + do { + + Interface::copy_matrix(A_ref,A,size); + Interface::copy_matrix(B_ref,B,size); + Interface::copy_matrix(X_ref,X,size); + + + // Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!! + } + while(chronos.check()); + + chronos.report(true); + + // Time measurement + + chronos.start(nb_calc); + + do { + + Interface::copy_matrix(A_ref,A,size); + Interface::copy_matrix(B_ref,B,size); + Interface::copy_matrix(X_ref,X,size); + + Interface::matrix_matrix_product(A,B,X,size); // here it is not commented !!!! + } + while(chronos.check()); + + chronos.report(true); + + double time=chronos.calculated_time/2000.0; + + // calculation check + + typename Interface::stl_matrix resu_stl(size); + + Interface::matrix_to_stl(X,resu_stl); + + STL_interface::matrix_matrix_product(A_stl,B_stl,X_stl,size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-6){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(1); + } + + // deallocation and return time + + Interface::free_matrix(A,size); + Interface::free_matrix(B,size); + Interface::free_matrix(X,size); + + Interface::free_matrix(A_ref,size); + Interface::free_matrix(B_ref,size); + Interface::free_matrix(X_ref,size); + + return time; + } + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_matrix_vector_product.hh b/thirdparty/eigen/bench/btl/actions/action_matrix_vector_product.hh new file mode 100644 index 000000000..8bab79d18 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_matrix_vector_product.hh @@ -0,0 +1,153 @@ +//===================================================== +// File : action_matrix_vector_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_MATRIX_VECTOR_PRODUCT +#define ACTION_MATRIX_VECTOR_PRODUCT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_matrix_vector_product { + +public : + + // Ctor + + BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size) + { + MESSAGE("Action_matrix_vector_product Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_vector(B_stl,_size); + init_vector(X_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B_ref,B_stl); + Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(X,X_stl); + + } + + // invalidate copy ctor + + Action_matrix_vector_product( const Action_matrix_vector_product & ) + { + INFOS("illegal call to Action_matrix_vector_product Copy Ctor"); + exit(1); + } + + // Dtor + + BTL_DONT_INLINE ~Action_matrix_vector_product( void ){ + + MESSAGE("Action_matrix_vector_product Dtor"); + + // deallocation + + Interface::free_matrix(A,_size); + Interface::free_vector(B); + Interface::free_vector(X); + + Interface::free_matrix(A_ref,_size); + Interface::free_vector(B_ref); + Interface::free_vector(X_ref); + + } + + // action name + + static inline std::string name( void ) + { + return "matrix_vector_" + Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size*_size; + } + + BTL_DONT_INLINE void initialize( void ){ + + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_vector(B_ref,B,_size); + Interface::copy_vector(X_ref,X,_size); + + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("#begin matrix_vector_product"); + Interface::matrix_vector_product(A,B,X,_size); + BTL_ASM_COMMENT("end matrix_vector_product"); + } + + BTL_DONT_INLINE void check_result( void ){ + + // calculation check + + Interface::vector_to_stl(X,resu_stl); + + STL_interface::matrix_vector_product(A_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-5){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(0); + } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_vector B_ref; + typename Interface::gene_vector X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_vector B; + typename Interface::gene_vector X; + + + int _size; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_partial_lu.hh b/thirdparty/eigen/bench/btl/actions/action_partial_lu.hh new file mode 100644 index 000000000..770ea1d1e --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_partial_lu.hh @@ -0,0 +1,125 @@ +//===================================================== +// File : action_lu_decomp.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_PARTIAL_LU +#define ACTION_PARTIAL_LU +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_partial_lu { + +public : + + // Ctor + + Action_partial_lu( int size ):_size(size) + { + MESSAGE("Action_partial_lu Ctor"); + + // STL vector initialization + init_matrix(X_stl,_size); + init_matrix(C_stl,_size); + + // make sure X is invertible + for (int i=0; i<_size; ++i) + X_stl[i][i] = X_stl[i][i] * 1e2 + 1; + + // generic matrix and vector initialization + Interface::matrix_from_stl(X_ref,X_stl); + Interface::matrix_from_stl(X,X_stl); + Interface::matrix_from_stl(C,C_stl); + + _cost = 2.0*size*size*size/3.0 + size*size; + } + + // invalidate copy ctor + + Action_partial_lu( const Action_partial_lu & ) + { + INFOS("illegal call to Action_partial_lu Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_partial_lu( void ){ + + MESSAGE("Action_partial_lu Dtor"); + + // deallocation + Interface::free_matrix(X_ref,_size); + Interface::free_matrix(X,_size); + Interface::free_matrix(C,_size); + } + + // action name + + static inline std::string name( void ) + { + return "partial_lu_decomp_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + Interface::copy_matrix(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::partial_lu_decomp(X,C,_size); + } + + void check_result( void ){ + // calculation check +// Interface::matrix_to_stl(C,resu_stl); + +// STL_interface::lu_decomp(X_stl,C_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(C_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix C_stl; + + typename Interface::gene_matrix X_ref; + typename Interface::gene_matrix X; + typename Interface::gene_matrix C; + + int _size; + double _cost; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_rot.hh b/thirdparty/eigen/bench/btl/actions/action_rot.hh new file mode 100644 index 000000000..df822a6d6 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_rot.hh @@ -0,0 +1,116 @@ + +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_ROT +#define ACTION_ROT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_rot { + +public : + + // Ctor + BTL_DONT_INLINE Action_rot( int size ):_size(size) + { + MESSAGE("Action_rot Ctor"); + + // STL matrix and vector initialization + typename Interface::stl_matrix tmp; + init_vector(A_stl,_size); + init_vector(B_stl,_size); + + // generic matrix and vector initialization + Interface::vector_from_stl(A_ref,A_stl); + Interface::vector_from_stl(A,A_stl); + Interface::vector_from_stl(B_ref,B_stl); + Interface::vector_from_stl(B,B_stl); + } + + // invalidate copy ctor + Action_rot( const Action_rot & ) + { + INFOS("illegal call to Action_rot Copy Ctor"); + exit(1); + } + + // Dtor + BTL_DONT_INLINE ~Action_rot( void ){ + MESSAGE("Action_rot Dtor"); + Interface::free_vector(A); + Interface::free_vector(B); + Interface::free_vector(A_ref); + Interface::free_vector(B_ref); + } + + // action name + static inline std::string name( void ) + { + return "rot_" + Interface::name(); + } + + double nb_op_base( void ){ + return 6.0*_size; + } + + BTL_DONT_INLINE void initialize( void ){ + Interface::copy_vector(A_ref,A,_size); + Interface::copy_vector(B_ref,B,_size); + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("#begin rot"); + Interface::rot(A,B,0.5,0.6,_size); + BTL_ASM_COMMENT("end rot"); + } + + BTL_DONT_INLINE void check_result( void ){ + // calculation check +// Interface::vector_to_stl(X,resu_stl); + +// STL_interface::rot(A_stl,B_stl,X_stl,_size); + +// typename Interface::real_type error= +// STL_interface::norm_diff(X_stl,resu_stl); + +// if (error>1.e-3){ +// INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); +// } + + } + +private : + + typename Interface::stl_vector A_stl; + typename Interface::stl_vector B_stl; + + typename Interface::gene_vector A_ref; + typename Interface::gene_vector B_ref; + + typename Interface::gene_vector A; + typename Interface::gene_vector B; + + int _size; +}; + + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_symv.hh b/thirdparty/eigen/bench/btl/actions/action_symv.hh new file mode 100644 index 000000000..a32b9dfa0 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_symv.hh @@ -0,0 +1,139 @@ +//===================================================== +// File : action_symv.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_SYMV +#define ACTION_SYMV +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_symv { + +public : + + // Ctor + + BTL_DONT_INLINE Action_symv( int size ):_size(size) + { + MESSAGE("Action_symv Ctor"); + + // STL matrix and vector initialization + init_matrix_symm(A_stl,_size); + init_vector(B_stl,_size); + init_vector(X_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B_ref,B_stl); + Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(X,X_stl); + + } + + // invalidate copy ctor + + Action_symv( const Action_symv & ) + { + INFOS("illegal call to Action_symv Copy Ctor"); + exit(1); + } + + // Dtor + BTL_DONT_INLINE ~Action_symv( void ){ + Interface::free_matrix(A,_size); + Interface::free_vector(B); + Interface::free_vector(X); + Interface::free_matrix(A_ref,_size); + Interface::free_vector(B_ref); + Interface::free_vector(X_ref); + } + + // action name + + static inline std::string name( void ) + { + return "symv_" + Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size*_size; + } + + BTL_DONT_INLINE void initialize( void ){ + + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_vector(B_ref,B,_size); + Interface::copy_vector(X_ref,X,_size); + + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("#begin symv"); + Interface::symv(A,B,X,_size); + BTL_ASM_COMMENT("end symv"); + } + + BTL_DONT_INLINE void check_result( void ){ + if (_size>128) return; + // calculation check + Interface::vector_to_stl(X,resu_stl); + + STL_interface::symv(A_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-5){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(0); + } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_vector B_ref; + typename Interface::gene_vector X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_vector B; + typename Interface::gene_vector X; + + + int _size; + +}; + + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_syr2.hh b/thirdparty/eigen/bench/btl/actions/action_syr2.hh new file mode 100644 index 000000000..7c6712b13 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_syr2.hh @@ -0,0 +1,133 @@ +//===================================================== +// File : action_syr2.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_SYR2 +#define ACTION_SYR2 +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_syr2 { + +public : + + // Ctor + + BTL_DONT_INLINE Action_syr2( int size ):_size(size) + { + // STL matrix and vector initialization + typename Interface::stl_matrix tmp; + init_matrix(A_stl,_size); + init_vector(B_stl,_size); + init_vector(X_stl,_size); + init_vector(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(A_ref,A_stl); + Interface::matrix_from_stl(A,A_stl); + Interface::vector_from_stl(B_ref,B_stl); + Interface::vector_from_stl(B,B_stl); + Interface::vector_from_stl(X_ref,X_stl); + Interface::vector_from_stl(X,X_stl); + } + + // invalidate copy ctor + Action_syr2( const Action_syr2 & ) + { + INFOS("illegal call to Action_syr2 Copy Ctor"); + exit(1); + } + + // Dtor + BTL_DONT_INLINE ~Action_syr2( void ){ + Interface::free_matrix(A,_size); + Interface::free_vector(B); + Interface::free_vector(X); + Interface::free_matrix(A_ref,_size); + Interface::free_vector(B_ref); + Interface::free_vector(X_ref); + } + + // action name + + static inline std::string name( void ) + { + return "syr2_" + Interface::name(); + } + + double nb_op_base( void ){ + return 2.0*_size*_size; + } + + BTL_DONT_INLINE void initialize( void ){ + Interface::copy_matrix(A_ref,A,_size); + Interface::copy_vector(B_ref,B,_size); + Interface::copy_vector(X_ref,X,_size); + } + + BTL_DONT_INLINE void calculate( void ) { + BTL_ASM_COMMENT("#begin syr2"); + Interface::syr2(A,B,X,_size); + BTL_ASM_COMMENT("end syr2"); + } + + BTL_DONT_INLINE void check_result( void ){ + // calculation check + Interface::vector_to_stl(X,resu_stl); + + STL_interface::syr2(A_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-3){ + INFOS("WRONG CALCULATION...residual=" << error); +// exit(0); + } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_vector B_ref; + typename Interface::gene_vector X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_vector B; + typename Interface::gene_vector X; + + + int _size; + +}; + + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_trisolve.hh b/thirdparty/eigen/bench/btl/actions/action_trisolve.hh new file mode 100644 index 000000000..d6f0b477e --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_trisolve.hh @@ -0,0 +1,137 @@ +//===================================================== +// File : action_trisolve.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_TRISOLVE +#define ACTION_TRISOLVE +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_trisolve { + +public : + + // Ctor + + Action_trisolve( int size ):_size(size) + { + MESSAGE("Action_trisolve Ctor"); + + // STL vector initialization + init_matrix(L_stl,_size); + init_vector(B_stl,_size); + init_vector(X_stl,_size); + for (int j=0; j<_size; ++j) + { + for (int i=0; i(resu_stl,_size); + + // generic matrix and vector initialization + Interface::matrix_from_stl(L,L_stl); + Interface::vector_from_stl(X,X_stl); + Interface::vector_from_stl(B,B_stl); + + _cost = 0; + for (int j=0; j<_size; ++j) + { + _cost += 2*j + 1; + } + } + + // invalidate copy ctor + + Action_trisolve( const Action_trisolve & ) + { + INFOS("illegal call to Action_trisolve Copy Ctor"); + exit(1); + } + + // Dtor + + ~Action_trisolve( void ){ + + MESSAGE("Action_trisolve Dtor"); + + // deallocation + Interface::free_matrix(L,_size); + Interface::free_vector(B); + Interface::free_vector(X); + } + + // action name + + static inline std::string name( void ) + { + return "trisolve_vector_"+Interface::name(); + } + + double nb_op_base( void ){ + return _cost; + } + + inline void initialize( void ){ + //Interface::copy_vector(X_ref,X,_size); + } + + inline void calculate( void ) { + Interface::trisolve_lower(L,B,X,_size); + } + + void check_result(){ + if (_size>128) return; + // calculation check + Interface::vector_to_stl(X,resu_stl); + + STL_interface::trisolve_lower(L_stl,B_stl,X_stl,_size); + + typename Interface::real_type error= + STL_interface::norm_diff(X_stl,resu_stl); + + if (error>1.e-4){ + INFOS("WRONG CALCULATION...residual=" << error); + exit(2); + } //else INFOS("CALCULATION OK...residual=" << error); + + } + +private : + + typename Interface::stl_matrix L_stl; + typename Interface::stl_vector X_stl; + typename Interface::stl_vector B_stl; + typename Interface::stl_vector resu_stl; + + typename Interface::gene_matrix L; + typename Interface::gene_vector X; + typename Interface::gene_vector B; + + int _size; + double _cost; +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/actions/action_trisolve_matrix.hh b/thirdparty/eigen/bench/btl/actions/action_trisolve_matrix.hh new file mode 100644 index 000000000..0fc2bb9ef --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_trisolve_matrix.hh @@ -0,0 +1,165 @@ +//===================================================== +// File : action_matrix_matrix_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_TRISOLVE_MATRIX_PRODUCT +#define ACTION_TRISOLVE_MATRIX_PRODUCT +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_trisolve_matrix { + +public : + + // Ctor + + Action_trisolve_matrix( int size ):_size(size) + { + MESSAGE("Action_trisolve_matrix Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_matrix(B_stl,_size); + init_matrix(X_stl,_size); + init_matrix(resu_stl,_size); + + for (int j=0; j<_size; ++j) + { + for (int i=0; i::matrix_matrix_product(A_stl,B_stl,X_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(X_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// // exit(1); +// } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_matrix B_stl; + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_matrix B_ref; + typename Interface::gene_matrix X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_matrix B; + typename Interface::gene_matrix X; + + int _size; + double _cost; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/action_trmm.hh b/thirdparty/eigen/bench/btl/actions/action_trmm.hh new file mode 100644 index 000000000..8f7813818 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/action_trmm.hh @@ -0,0 +1,165 @@ +//===================================================== +// File : action_matrix_matrix_product.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef ACTION_TRMM +#define ACTION_TRMM +#include "utilities.h" +#include "STL_interface.hh" +#include +#include "init/init_function.hh" +#include "init/init_vector.hh" +#include "init/init_matrix.hh" + +using namespace std; + +template +class Action_trmm { + +public : + + // Ctor + + Action_trmm( int size ):_size(size) + { + MESSAGE("Action_trmm Ctor"); + + // STL matrix and vector initialization + + init_matrix(A_stl,_size); + init_matrix(B_stl,_size); + init_matrix(X_stl,_size); + init_matrix(resu_stl,_size); + + for (int j=0; j<_size; ++j) + { + for (int i=0; i::matrix_matrix_product(A_stl,B_stl,X_stl,_size); +// +// typename Interface::real_type error= +// STL_interface::norm_diff(X_stl,resu_stl); +// +// if (error>1.e-6){ +// INFOS("WRONG CALCULATION...residual=" << error); +// // exit(1); +// } + + } + +private : + + typename Interface::stl_matrix A_stl; + typename Interface::stl_matrix B_stl; + typename Interface::stl_matrix X_stl; + typename Interface::stl_matrix resu_stl; + + typename Interface::gene_matrix A_ref; + typename Interface::gene_matrix B_ref; + typename Interface::gene_matrix X_ref; + + typename Interface::gene_matrix A; + typename Interface::gene_matrix B; + typename Interface::gene_matrix X; + + int _size; + double _cost; + +}; + + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/actions/basic_actions.hh b/thirdparty/eigen/bench/btl/actions/basic_actions.hh new file mode 100644 index 000000000..a3333ea26 --- /dev/null +++ b/thirdparty/eigen/bench/btl/actions/basic_actions.hh @@ -0,0 +1,21 @@ + +#include "action_axpy.hh" +#include "action_axpby.hh" + +#include "action_matrix_vector_product.hh" +#include "action_atv_product.hh" + +#include "action_matrix_matrix_product.hh" +// #include "action_ata_product.hh" +#include "action_aat_product.hh" + +#include "action_trisolve.hh" +#include "action_trmm.hh" +#include "action_symv.hh" +// #include "action_symm.hh" +#include "action_syr2.hh" +#include "action_ger.hh" +#include "action_rot.hh" + +// #include "action_lu_solve.hh" + diff --git a/thirdparty/eigen/bench/btl/cmake/FindACML.cmake b/thirdparty/eigen/bench/btl/cmake/FindACML.cmake new file mode 100644 index 000000000..4989fa2f4 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindACML.cmake @@ -0,0 +1,51 @@ + +if (ACML_LIBRARIES) + set(ACML_FIND_QUIETLY TRUE) +endif (ACML_LIBRARIES) + +find_library(ACML_LIBRARIES + NAMES + acml_mp acml_mv + PATHS + $ENV{ACMLDIR}/lib + $ENV{ACML_DIR}/lib + ${LIB_INSTALL_DIR} +) + +find_file(ACML_LIBRARIES + NAMES + libacml_mp.so + PATHS + /usr/lib + /usr/lib64 + $ENV{ACMLDIR}/lib + ${LIB_INSTALL_DIR} +) + +if(NOT ACML_LIBRARIES) + message(STATUS "Multi-threaded library not found, looking for single-threaded") + find_library(ACML_LIBRARIES + NAMES + acml acml_mv + PATHS + $ENV{ACMLDIR}/lib + $ENV{ACML_DIR}/lib + ${LIB_INSTALL_DIR} + ) + find_file(ACML_LIBRARIES + libacml.so libacml_mv.so + PATHS + /usr/lib + /usr/lib64 + $ENV{ACMLDIR}/lib + ${LIB_INSTALL_DIR} + ) +endif() + + + + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ACML DEFAULT_MSG ACML_LIBRARIES) + +mark_as_advanced(ACML_LIBRARIES) diff --git a/thirdparty/eigen/bench/btl/cmake/FindATLAS.cmake b/thirdparty/eigen/bench/btl/cmake/FindATLAS.cmake new file mode 100644 index 000000000..4136a989d --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindATLAS.cmake @@ -0,0 +1,31 @@ + +if (ATLAS_LIBRARIES) + set(ATLAS_FIND_QUIETLY TRUE) +endif (ATLAS_LIBRARIES) + +find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LIB satlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +find_file(ATLAS_LAPACK NAMES liblapack_atlas.so.3 liblapack.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LAPACK NAMES lapack_atlas lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) + +if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) + + set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_LIB}) + + # search the default lapack lib link to it + find_file(ATLAS_REFERENCE_LAPACK liblapack.so.3 PATHS /usr/lib /usr/lib64) + find_library(ATLAS_REFERENCE_LAPACK NAMES lapack) +# if(ATLAS_REFERENCE_LAPACK) +# set(ATLAS_LIBRARIES ${ATLAS_LIBRARIES} ${ATLAS_REFERENCE_LAPACK}) +# endif() + +endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ATLAS DEFAULT_MSG ATLAS_LIBRARIES) + +mark_as_advanced(ATLAS_LIBRARIES) diff --git a/thirdparty/eigen/bench/btl/cmake/FindBLAZE.cmake b/thirdparty/eigen/bench/btl/cmake/FindBLAZE.cmake new file mode 100644 index 000000000..dba4c89f2 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindBLAZE.cmake @@ -0,0 +1,31 @@ +# - Try to find eigen2 headers +# Once done this will define +# +# BLAZE_FOUND - system has blaze lib +# BLAZE_INCLUDE_DIR - the blaze include directory +# +# Copyright (C) 2008 Gael Guennebaud +# Adapted from FindEigen.cmake: +# Copyright (c) 2006, 2007 Montel Laurent, +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +if (BLAZE_INCLUDE_DIR) + + # in cache already + set(BLAZE_FOUND TRUE) + +else (BLAZE_INCLUDE_DIR) + +find_path(BLAZE_INCLUDE_DIR NAMES blaze/Blaze.h + PATHS + ${INCLUDE_INSTALL_DIR} + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(BLAZE DEFAULT_MSG BLAZE_INCLUDE_DIR) + +mark_as_advanced(BLAZE_INCLUDE_DIR) + +endif(BLAZE_INCLUDE_DIR) + diff --git a/thirdparty/eigen/bench/btl/cmake/FindBlitz.cmake b/thirdparty/eigen/bench/btl/cmake/FindBlitz.cmake new file mode 100644 index 000000000..92880bbed --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindBlitz.cmake @@ -0,0 +1,40 @@ +# - Try to find blitz lib +# Once done this will define +# +# BLITZ_FOUND - system has blitz lib +# BLITZ_INCLUDES - the blitz include directory +# BLITZ_LIBRARIES - The libraries needed to use blitz + +# Copyright (c) 2006, Montel Laurent, +# Copyright (c) 2007, Allen Winter, +# Copyright (C) 2008 Gael Guennebaud +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +# include(FindLibraryWithDebug) + +if (BLITZ_INCLUDES AND BLITZ_LIBRARIES) + set(Blitz_FIND_QUIETLY TRUE) +endif (BLITZ_INCLUDES AND BLITZ_LIBRARIES) + +find_path(BLITZ_INCLUDES + NAMES + blitz/array.h + PATH_SUFFIXES blitz* + PATHS + $ENV{BLITZDIR}/include + ${INCLUDE_INSTALL_DIR} +) + +find_library(BLITZ_LIBRARIES + blitz + PATHS + $ENV{BLITZDIR}/lib + ${LIB_INSTALL_DIR} +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Blitz DEFAULT_MSG + BLITZ_INCLUDES BLITZ_LIBRARIES) + +mark_as_advanced(BLITZ_INCLUDES BLITZ_LIBRARIES) diff --git a/thirdparty/eigen/bench/btl/cmake/FindCBLAS.cmake b/thirdparty/eigen/bench/btl/cmake/FindCBLAS.cmake new file mode 100644 index 000000000..ce0f2f2b2 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindCBLAS.cmake @@ -0,0 +1,35 @@ +# include(FindLibraryWithDebug) + +if (CBLAS_INCLUDES AND CBLAS_LIBRARIES) + set(CBLAS_FIND_QUIETLY TRUE) +endif (CBLAS_INCLUDES AND CBLAS_LIBRARIES) + +find_path(CBLAS_INCLUDES + NAMES + cblas.h + PATHS + $ENV{CBLASDIR}/include + ${INCLUDE_INSTALL_DIR} +) + +find_library(CBLAS_LIBRARIES + cblas + PATHS + $ENV{CBLASDIR}/lib + ${LIB_INSTALL_DIR} +) + +find_file(CBLAS_LIBRARIES + libcblas.so.3 + PATHS + /usr/lib + /usr/lib64 + $ENV{CBLASDIR}/lib + ${LIB_INSTALL_DIR} +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(CBLAS DEFAULT_MSG + CBLAS_INCLUDES CBLAS_LIBRARIES) + +mark_as_advanced(CBLAS_INCLUDES CBLAS_LIBRARIES) diff --git a/thirdparty/eigen/bench/btl/cmake/FindGMM.cmake b/thirdparty/eigen/bench/btl/cmake/FindGMM.cmake new file mode 100644 index 000000000..5049c64ed --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindGMM.cmake @@ -0,0 +1,17 @@ +if (GMM_INCLUDE_DIR) + # in cache already + set(GMM_FOUND TRUE) +else (GMM_INCLUDE_DIR) + +find_path(GMM_INCLUDE_DIR NAMES gmm/gmm.h + PATHS + ${INCLUDE_INSTALL_DIR} + ${GMM_INCLUDE_PATH} + ) + +include(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GMM DEFAULT_MSG GMM_INCLUDE_DIR ) + +mark_as_advanced(GMM_INCLUDE_DIR) + +endif(GMM_INCLUDE_DIR) diff --git a/thirdparty/eigen/bench/btl/cmake/FindMKL.cmake b/thirdparty/eigen/bench/btl/cmake/FindMKL.cmake new file mode 100644 index 000000000..f4d7c6ebe --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindMKL.cmake @@ -0,0 +1,65 @@ + +if (MKL_LIBRARIES) + set(MKL_FIND_QUIETLY TRUE) +endif (MKL_LIBRARIES) + +if(CMAKE_MINOR_VERSION GREATER 4) + +if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") + +find_library(MKL_LIBRARIES + mkl_core + PATHS + $ENV{MKLLIB} + /opt/intel/mkl/*/lib/em64t + /opt/intel/Compiler/*/*/mkl/lib/em64t + ${LIB_INSTALL_DIR} +) + +find_library(MKL_GUIDE + guide + PATHS + $ENV{MKLLIB} + /opt/intel/mkl/*/lib/em64t + /opt/intel/Compiler/*/*/mkl/lib/em64t + /opt/intel/Compiler/*/*/lib/intel64 + ${LIB_INSTALL_DIR} +) + +if(MKL_LIBRARIES AND MKL_GUIDE) + set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64 mkl_sequential ${MKL_GUIDE} pthread) +endif() + +else(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") + +find_library(MKL_LIBRARIES + mkl_core + PATHS + $ENV{MKLLIB} + /opt/intel/mkl/*/lib/32 + /opt/intel/Compiler/*/*/mkl/lib/32 + ${LIB_INSTALL_DIR} +) + +find_library(MKL_GUIDE + guide + PATHS + $ENV{MKLLIB} + /opt/intel/mkl/*/lib/32 + /opt/intel/Compiler/*/*/mkl/lib/32 + /opt/intel/Compiler/*/*/lib/intel32 + ${LIB_INSTALL_DIR} +) + +if(MKL_LIBRARIES AND MKL_GUIDE) + set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel mkl_sequential ${MKL_GUIDE} pthread) +endif() + +endif(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") + +endif(CMAKE_MINOR_VERSION GREATER 4) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARIES) + +mark_as_advanced(MKL_LIBRARIES) diff --git a/thirdparty/eigen/bench/btl/cmake/FindMTL4.cmake b/thirdparty/eigen/bench/btl/cmake/FindMTL4.cmake new file mode 100644 index 000000000..3de490980 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindMTL4.cmake @@ -0,0 +1,31 @@ +# - Try to find eigen2 headers +# Once done this will define +# +# MTL4_FOUND - system has eigen2 lib +# MTL4_INCLUDE_DIR - the eigen2 include directory +# +# Copyright (C) 2008 Gael Guennebaud +# Adapted from FindEigen.cmake: +# Copyright (c) 2006, 2007 Montel Laurent, +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +if (MTL4_INCLUDE_DIR) + + # in cache already + set(MTL4_FOUND TRUE) + +else (MTL4_INCLUDE_DIR) + +find_path(MTL4_INCLUDE_DIR NAMES boost/numeric/mtl/mtl.hpp + PATHS + ${INCLUDE_INSTALL_DIR} + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(MTL4 DEFAULT_MSG MTL4_INCLUDE_DIR) + +mark_as_advanced(MTL4_INCLUDE_DIR) + +endif(MTL4_INCLUDE_DIR) + diff --git a/thirdparty/eigen/bench/btl/cmake/FindOPENBLAS.cmake b/thirdparty/eigen/bench/btl/cmake/FindOPENBLAS.cmake new file mode 100644 index 000000000..2a0919436 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindOPENBLAS.cmake @@ -0,0 +1,17 @@ + +if (OPENBLAS_LIBRARIES) + set(OPENBLAS_FIND_QUIETLY TRUE) +endif (OPENBLAS_LIBRARIES) + +find_file(OPENBLAS_LIBRARIES NAMES libopenblas.so libopenblas.so.0 PATHS /usr/lib /usr/lib64 $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) +find_library(OPENBLAS_LIBRARIES openblas PATHS $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) + +if(OPENBLAS_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) + set(OPENBLAS_LIBRARIES ${OPENBLAS_LIBRARIES} "-lpthread -lgfortran") +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(OPENBLAS DEFAULT_MSG + OPENBLAS_LIBRARIES) + +mark_as_advanced(OPENBLAS_LIBRARIES) diff --git a/thirdparty/eigen/bench/btl/cmake/FindPackageHandleStandardArgs.cmake b/thirdparty/eigen/bench/btl/cmake/FindPackageHandleStandardArgs.cmake new file mode 100644 index 000000000..7f122edcd --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindPackageHandleStandardArgs.cmake @@ -0,0 +1,60 @@ +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... ) +# +# This macro is intended to be used in FindXXX.cmake modules files. +# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and +# it also sets the _FOUND variable. +# The package is found if all variables listed are TRUE. +# Example: +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) +# +# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and +# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. +# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, +# independent whether QUIET was used or not. +# +# If it is found, the location is reported using the VAR1 argument, so +# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. +# If the second argument is DEFAULT_MSG, the message in the failure case will +# be "Could NOT find LibXml2", if you don't like this message you can specify +# your own custom failure message there. + +MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) + + IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + IF (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") + ELSE (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") + ENDIF (${_NAME}_FIND_REQUIRED) + ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + SET(_FAIL_MESSAGE "${_FAIL_MSG}") + ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + + STRING(TOUPPER ${_NAME} _NAME_UPPER) + + SET(${_NAME_UPPER}_FOUND TRUE) + IF(NOT ${_VAR1}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_VAR1}) + + FOREACH(_CURRENT_VAR ${ARGN}) + IF(NOT ${_CURRENT_VAR}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_CURRENT_VAR}) + ENDFOREACH(_CURRENT_VAR) + + IF (${_NAME_UPPER}_FOUND) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ELSE (${_NAME_UPPER}_FOUND) + IF (${_NAME}_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}") + ELSE (${_NAME}_FIND_REQUIRED) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "${_FAIL_MESSAGE}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ENDIF (${_NAME}_FIND_REQUIRED) + ENDIF (${_NAME_UPPER}_FOUND) +ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/thirdparty/eigen/bench/btl/cmake/FindTvmet.cmake b/thirdparty/eigen/bench/btl/cmake/FindTvmet.cmake new file mode 100644 index 000000000..26a29d965 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/FindTvmet.cmake @@ -0,0 +1,32 @@ +# - Try to find tvmet headers +# Once done this will define +# +# TVMET_FOUND - system has tvmet lib +# TVMET_INCLUDE_DIR - the tvmet include directory +# +# Copyright (C) 2008 Gael Guennebaud +# Adapted from FindEigen.cmake: +# Copyright (c) 2006, 2007 Montel Laurent, +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +if (TVMET_INCLUDE_DIR) + + # in cache already + set(TVMET_FOUND TRUE) + +else (TVMET_INCLUDE_DIR) + +find_path(TVMET_INCLUDE_DIR NAMES tvmet/tvmet.h + PATHS + ${TVMETDIR}/ + ${INCLUDE_INSTALL_DIR} + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Tvmet DEFAULT_MSG TVMET_INCLUDE_DIR) + +mark_as_advanced(TVMET_INCLUDE_DIR) + +endif(TVMET_INCLUDE_DIR) + diff --git a/thirdparty/eigen/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake b/thirdparty/eigen/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake new file mode 100644 index 000000000..545048b68 --- /dev/null +++ b/thirdparty/eigen/bench/btl/cmake/MacroOptionalAddSubdirectory.cmake @@ -0,0 +1,31 @@ +# - MACRO_OPTIONAL_ADD_SUBDIRECTORY() combines ADD_SUBDIRECTORY() with an OPTION() +# MACRO_OPTIONAL_ADD_SUBDIRECTORY(
) +# If you use MACRO_OPTIONAL_ADD_SUBDIRECTORY() instead of ADD_SUBDIRECTORY(), +# this will have two effects +# 1 - CMake will not complain if the directory doesn't exist +# This makes sense if you want to distribute just one of the subdirs +# in a source package, e.g. just one of the subdirs in kdeextragear. +# 2 - If the directory exists, it will offer an option to skip the +# subdirectory. +# This is useful if you want to compile only a subset of all +# directories. + +# Copyright (c) 2007, Alexander Neundorf, +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + + +MACRO (MACRO_OPTIONAL_ADD_SUBDIRECTORY _dir ) + GET_FILENAME_COMPONENT(_fullPath ${_dir} ABSOLUTE) + IF(EXISTS ${_fullPath}) + IF(${ARGC} EQUAL 2) + OPTION(BUILD_${_dir} "Build directory ${_dir}" ${ARGV1}) + ELSE(${ARGC} EQUAL 2) + OPTION(BUILD_${_dir} "Build directory ${_dir}" TRUE) + ENDIF(${ARGC} EQUAL 2) + IF(BUILD_${_dir}) + ADD_SUBDIRECTORY(${_dir}) + ENDIF(BUILD_${_dir}) + ENDIF(EXISTS ${_fullPath}) +ENDMACRO (MACRO_OPTIONAL_ADD_SUBDIRECTORY) diff --git a/thirdparty/eigen/bench/btl/data/CMakeLists.txt b/thirdparty/eigen/bench/btl/data/CMakeLists.txt new file mode 100644 index 000000000..6af2a366f --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/CMakeLists.txt @@ -0,0 +1,32 @@ + +ADD_CUSTOM_TARGET(copy_scripts) + +SET(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh + perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh ) + +FOREACH(script_file ${script_files}) +ADD_CUSTOM_COMMAND( + TARGET copy_scripts + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${script_file} ${CMAKE_CURRENT_BINARY_DIR}/ + ARGS +) +ENDFOREACH(script_file) + +ADD_CUSTOM_COMMAND( + TARGET copy_scripts + POST_BUILD + COMMAND ${CMAKE_CXX_COMPILER} --version | head -n 1 > ${CMAKE_CURRENT_BINARY_DIR}/compiler_version.txt + ARGS +) +ADD_CUSTOM_COMMAND( + TARGET copy_scripts + POST_BUILD + COMMAND echo "${Eigen_SOURCE_DIR}" > ${CMAKE_CURRENT_BINARY_DIR}/eigen_root_dir.txt + ARGS +) + +add_executable(smooth smooth.cxx) +add_executable(regularize regularize.cxx) +add_executable(main mean.cxx) +add_dependencies(main copy_scripts) diff --git a/thirdparty/eigen/bench/btl/data/action_settings.txt b/thirdparty/eigen/bench/btl/data/action_settings.txt new file mode 100644 index 000000000..39d2b5dc4 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/action_settings.txt @@ -0,0 +1,19 @@ +aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000 +ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000 +atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000 +axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000 +axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000 +matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000 +matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000 +trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000 +trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000 +trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000 +cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000 +complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000 +partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000 +tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000 +hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000 +symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000 +syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000 +ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000 +rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000 diff --git a/thirdparty/eigen/bench/btl/data/gnuplot_common_settings.hh b/thirdparty/eigen/bench/btl/data/gnuplot_common_settings.hh new file mode 100644 index 000000000..6f677df60 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/gnuplot_common_settings.hh @@ -0,0 +1,87 @@ +set noclip points +set clip one +set noclip two +set bar 1.000000 +set border 31 lt -1 lw 1.000 +set xdata +set ydata +set zdata +set x2data +set y2data +set boxwidth +set dummy x,y +set format x "%g" +set format y "%g" +set format x2 "%g" +set format y2 "%g" +set format z "%g" +set angles radians +set nogrid +set key title "" +set key left top Right noreverse box linetype -2 linewidth 1.000 samplen 4 spacing 1 width 0 +set nolabel +set noarrow +# set nolinestyle # deprecated +set nologscale +set logscale x 10 +set offsets 0, 0, 0, 0 +set pointsize 1 +set encoding default +set nopolar +set noparametric +set view 60, 30, 1, 1 +set samples 100, 100 +set isosamples 10, 10 +set surface +set nocontour +set clabel '%8.3g' +set mapping cartesian +set nohidden3d +set cntrparam order 4 +set cntrparam linear +set cntrparam levels auto 5 +set cntrparam points 5 +set size ratio 0 1,1 +set origin 0,0 +# set data style lines +# set function style lines +set xzeroaxis lt -2 lw 1.000 +set x2zeroaxis lt -2 lw 1.000 +set yzeroaxis lt -2 lw 1.000 +set y2zeroaxis lt -2 lw 1.000 +set tics in +set ticslevel 0.5 +set tics scale 1, 0.5 +set mxtics default +set mytics default +set mx2tics default +set my2tics default +set xtics border mirror norotate autofreq +set ytics border mirror norotate autofreq +set ztics border nomirror norotate autofreq +set nox2tics +set noy2tics +set timestamp "" bottom norotate offset 0,0 +set rrange [ * : * ] noreverse nowriteback # (currently [-0:10] ) +set trange [ * : * ] noreverse nowriteback # (currently [-5:5] ) +set urange [ * : * ] noreverse nowriteback # (currently [-5:5] ) +set vrange [ * : * ] noreverse nowriteback # (currently [-5:5] ) +set xlabel "matrix size" offset 0,0 +set x2label "" offset 0,0 +set timefmt "%d/%m/%y\n%H:%M" +set xrange [ 10 : 1000 ] noreverse nowriteback +set x2range [ * : * ] noreverse nowriteback # (currently [-10:10] ) +set ylabel "MFLOPS" offset 0,0 +set y2label "" offset 0,0 +set yrange [ * : * ] noreverse nowriteback # (currently [-10:10] ) +set y2range [ * : * ] noreverse nowriteback # (currently [-10:10] ) +set zlabel "" offset 0,0 +set zrange [ * : * ] noreverse nowriteback # (currently [-10:10] ) +set zero 1e-08 +set lmargin -1 +set bmargin -1 +set rmargin -1 +set tmargin -1 +set locale "C" +set xrange [4:1024] + diff --git a/thirdparty/eigen/bench/btl/data/go_mean b/thirdparty/eigen/bench/btl/data/go_mean new file mode 100755 index 000000000..42338ca27 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/go_mean @@ -0,0 +1,58 @@ +#!/bin/bash + +if [ $# < 1 ]; then + echo "Usage: $0 working_directory [tiny|large [prefix]]" +else + +mkdir -p $1 +##cp ../libs/*/*.dat $1 + +mode=large +if [ $# > 2 ]; then + mode=$2 +fi +if [ $# > 3 ]; then + prefix=$3 +fi + +EIGENDIR=`cat eigen_root_dir.txt` + +webpagefilename=$1/index.html +meanstatsfilename=$1/mean.html + +echo '' > $meanstatsfilename +echo '' > $webpagefilename +echo '

Configuration' >> $webpagefilename +echo '

    '\ + '
  • ' `cat /proc/cpuinfo | grep "model name" | head -n 1`\ + ' (' `uname -m` ')
  • '\ + '
  • compiler: ' `cat compiler_version.txt` '
  • '\ + '
  • eigen3: ' `hg identify -i $EIGENDIR` '
  • '\ + '
' \ + '

' >> $webpagefilename + +source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix +source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix +source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix +# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh partial_lu_decomp $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix +source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh ger $1 11 50 300 1000 $mode $prefix +source mk_mean_script.sh rot $1 11 2500 100000 250000 $mode $prefix +source mk_mean_script.sh complete_lu_decomp $1 11 100 300 1000 $mode $prefix + +fi + +## compile the web page ## + +#echo `cat footer.html` >> $webpagefilename \ No newline at end of file diff --git a/thirdparty/eigen/bench/btl/data/mean.cxx b/thirdparty/eigen/bench/btl/data/mean.cxx new file mode 100644 index 000000000..c567ef33e --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/mean.cxx @@ -0,0 +1,182 @@ +//===================================================== +// File : mean.cxx +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include +#include +#include +#include +#include "bench_parameter.hh" +#include "utils/xy_file.hh" +#include + +using namespace std; + +double mean_calc(const vector & tab_sizes, const vector & tab_mflops, const int size_min, const int size_max); + +class Lib_Mean{ + +public: + Lib_Mean( void ):_lib_name(),_mean_in_cache(),_mean_out_of_cache(){ + MESSAGE("Lib_mean Default Ctor"); + MESSAGE("!!! should not be used"); + exit(0); + } + Lib_Mean(const string & name, const double & mic, const double & moc):_lib_name(name),_mean_in_cache(mic),_mean_out_of_cache(moc){ + MESSAGE("Lib_mean Ctor"); + } + Lib_Mean(const Lib_Mean & lm):_lib_name(lm._lib_name),_mean_in_cache(lm._mean_in_cache),_mean_out_of_cache(lm._mean_out_of_cache){ + MESSAGE("Lib_mean Copy Ctor"); + } + ~Lib_Mean( void ){ + MESSAGE("Lib_mean Dtor"); + } + + double _mean_in_cache; + double _mean_out_of_cache; + string _lib_name; + + bool operator < ( const Lib_Mean &right) const + { + //return ( this->_mean_out_of_cache > right._mean_out_of_cache) ; + return ( this->_mean_in_cache > right._mean_in_cache) ; + } + +}; + + +int main( int argc , char *argv[] ) +{ + + if (argc<6){ + INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2..."); + exit(0); + } + INFOS(argc); + + int min_in_cache=atoi(argv[2]); + int max_in_cache=atoi(argv[3]); + int min_out_of_cache=atoi(argv[4]); + int max_out_of_cache=atoi(argv[5]); + + + multiset s_lib_mean ; + + for (int i=6;i tab_sizes; + vector tab_mflops; + + read_xy_file(filename,tab_sizes,tab_mflops); + + mic=mean_calc(tab_sizes,tab_mflops,min_in_cache,max_in_cache); + moc=mean_calc(tab_sizes,tab_mflops,min_out_of_cache,max_out_of_cache); + + Lib_Mean cur_lib_mean(filename,mic,moc); + + s_lib_mean.insert(cur_lib_mean); + + } + + } + + + cout << "" << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + + multiset::iterator is = s_lib_mean.begin(); + Lib_Mean best(*is); + + + for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){ + + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + cout << " " << endl ; + + } + + cout << "
" << argv[1] << " in cache
mean perf
Mflops
in cache
% best
out of cache
mean perf
Mflops
out of cache
% best
details comments
" << is->_lib_name << " " << is->_mean_in_cache << " " << 100*(is->_mean_in_cache/best._mean_in_cache) << " " << is->_mean_out_of_cache << " " << 100*(is->_mean_out_of_cache/best._mean_out_of_cache) << " " << + "_lib_name<<"_"<snippet/" + "_lib_name<<"_flags\">flags " << + "_lib_name<<"_comments\">click here
" << endl ; + + ofstream output_file ("../order_lib",ios::out) ; + + for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){ + output_file << is->_lib_name << endl ; + } + + output_file.close(); + +} + +double mean_calc(const vector & tab_sizes, const vector & tab_mflops, const int size_min, const int size_max){ + + int size=tab_sizes.size(); + int nb_sample=0; + double mean=0.0; + + for (int i=0;i=size_min)&&(tab_sizes[i]<=size_max)){ + + nb_sample++; + mean+=tab_mflops[i]; + + } + + + } + + if (nb_sample==0){ + INFOS("no data for mean calculation"); + return 0.0; + } + + return mean/nb_sample; +} + + + + diff --git a/thirdparty/eigen/bench/btl/data/mk_gnuplot_script.sh b/thirdparty/eigen/bench/btl/data/mk_gnuplot_script.sh new file mode 100644 index 000000000..2ca7b5cb5 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/mk_gnuplot_script.sh @@ -0,0 +1,68 @@ +#! /bin/bash +WHAT=$1 +DIR=$2 +echo $WHAT script generation +cat $WHAT.hh > $WHAT.gnuplot + +DATA_FILE=`find $DIR -name "*.dat" | grep $WHAT` + +echo plot \\ >> $WHAT.gnuplot + +for FILE in $DATA_FILE +do + LAST=$FILE +done + +echo LAST=$LAST + +for FILE in $DATA_FILE +do + if [ $FILE != $LAST ] + then + BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} + echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot + fi +done +BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} +echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot + +#echo set term postscript color >> $WHAT.gnuplot +#echo set output "'"$WHAT.ps"'" >> $WHAT.gnuplot +echo set term pbm small color >> $WHAT.gnuplot +echo set output "'"$WHAT.ppm"'" >> $WHAT.gnuplot +echo plot \\ >> $WHAT.gnuplot + +for FILE in $DATA_FILE +do + if [ $FILE != $LAST ] + then + BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} + echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot + fi +done +BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} +echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot + +echo set term jpeg large >> $WHAT.gnuplot +echo set output "'"$WHAT.jpg"'" >> $WHAT.gnuplot +echo plot \\ >> $WHAT.gnuplot + +for FILE in $DATA_FILE +do + if [ $FILE != $LAST ] + then + BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} + echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot + fi +done +BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} +echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot + + +gnuplot -persist < $WHAT.gnuplot + +rm $WHAT.gnuplot + + + + diff --git a/thirdparty/eigen/bench/btl/data/mk_mean_script.sh b/thirdparty/eigen/bench/btl/data/mk_mean_script.sh new file mode 100644 index 000000000..b10df0240 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/mk_mean_script.sh @@ -0,0 +1,52 @@ +#! /bin/bash +WHAT=$1 +DIR=$2 +MINIC=$3 +MAXIC=$4 +MINOC=$5 +MAXOC=$6 +prefix=$8 + +meanstatsfilename=$2/mean.html + +WORK_DIR=tmp +mkdir $WORK_DIR + +DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}` + +if [ -n "$DATA_FILE" ]; then + + echo "" + echo "$1..." + for FILE in $DATA_FILE + do + ##echo hello world + ##echo "mk_mean_script1" ${FILE} + BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} + + ##echo "mk_mean_script1" ${TITLE} + cp $FILE ${WORK_DIR}/${TITLE} + + done + + cd $WORK_DIR + ../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename + ../mk_new_gnuplot.sh $1 $2 $7 + rm -f *.gnuplot + cd .. + + echo '
' >> $meanstatsfilename + + webpagefilename=$2/index.html + # echo '

'${WHAT}'

' >> $webpagefilename + echo '
'${WHAT}'
' >> $webpagefilename + +fi + +rm -R $WORK_DIR + + + + + + diff --git a/thirdparty/eigen/bench/btl/data/mk_new_gnuplot.sh b/thirdparty/eigen/bench/btl/data/mk_new_gnuplot.sh new file mode 100755 index 000000000..fad3b23a4 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/mk_new_gnuplot.sh @@ -0,0 +1,54 @@ +#!/bin/bash +WHAT=$1 +DIR=$2 + +cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot + +echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot +echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` " offset 0,0" >> $WHAT.gnuplot +echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot + +if [ $# > 3 ]; then + if [ "$3" == "tiny" ]; then + echo "set xrange [2:16]" >> $WHAT.gnuplot + echo "set nologscale" >> $WHAT.gnuplot + fi +fi + + + +DATA_FILE=`cat ../order_lib` +echo set term postscript color rounded enhanced >> $WHAT.gnuplot +echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot + +# echo set term svg color rounded enhanced >> $WHAT.gnuplot +# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot +# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot + +echo plot \\ >> $WHAT.gnuplot + +for FILE in $DATA_FILE +do + LAST=$FILE +done + +for FILE in $DATA_FILE +do + BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat} + + echo "'"$FILE"'" `grep $TITLE ../perlib_plot_settings.txt | head -n 1 | cut -d ";" -f 2` "\\" >> $WHAT.gnuplot + if [ $FILE != $LAST ] + then + echo ", \\" >> $WHAT.gnuplot + fi +done +echo " " >> $WHAT.gnuplot + +gnuplot -persist < $WHAT.gnuplot + +rm $WHAT.gnuplot + +ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf +convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 ../${DIR}/$WHAT.ps -background white -flatten ../${DIR}/$WHAT.png + +# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg diff --git a/thirdparty/eigen/bench/btl/data/perlib_plot_settings.txt b/thirdparty/eigen/bench/btl/data/perlib_plot_settings.txt new file mode 100644 index 000000000..f023cfe02 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/perlib_plot_settings.txt @@ -0,0 +1,16 @@ +eigen3 ; with lines lw 4 lt 1 lc rgbcolor "black" +eigen2 ; with lines lw 3 lt 1 lc rgbcolor "#999999" +EigenBLAS ; with lines lw 3 lt 3 lc rgbcolor "#999999" +eigen3_novec ; with lines lw 2 lt 1 lc rgbcolor "#999999" +eigen3_nogccvec ; with lines lw 2 lt 2 lc rgbcolor "#991010" +INTEL_MKL ; with lines lw 3 lt 1 lc rgbcolor "#ff0000" +ATLAS ; with lines lw 3 lt 1 lc rgbcolor "#008000" +gmm ; with lines lw 3 lt 1 lc rgbcolor "#0000ff" +ublas ; with lines lw 3 lt 1 lc rgbcolor "#00b7ff" +mtl4 ; with lines lw 3 lt 1 lc rgbcolor "#d18847" +blitz ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff" +F77 ; with lines lw 3 lt 3 lc rgbcolor "#e6e64c" +OPENBLAS ; with lines lw 3 lt 1 lc rgbcolor "#C05600" +C ; with lines lw 3 lt 3 lc rgbcolor "#e6bd96" +ACML ; with lines lw 2 lt 3 lc rgbcolor "#e6e64c" +blaze ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff" diff --git a/thirdparty/eigen/bench/btl/data/regularize.cxx b/thirdparty/eigen/bench/btl/data/regularize.cxx new file mode 100644 index 000000000..eea2b8b85 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/regularize.cxx @@ -0,0 +1,131 @@ +//===================================================== +// File : regularize.cxx +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include +#include +#include +#include +#include "bench_parameter.hh" +#include + +using namespace std; + +void read_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops); +void regularize_curve(const string & filename, + const vector & tab_mflops, + const vector & tab_sizes, + int start_cut_size, int stop_cut_size); +///////////////////////////////////////////////////////////////////////////////////////////////// + +int main( int argc , char *argv[] ) +{ + + // input data + + if (argc<4){ + INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename"); + exit(0); + } + INFOS(argc); + + int start_cut_size=atoi(argv[2]); + int stop_cut_size=atoi(argv[3]); + + string filename=argv[1]; + string regularize_filename=argv[4]; + + INFOS(filename); + INFOS("start_cut_size="< tab_sizes; + vector tab_mflops; + + read_xy_file(filename,tab_sizes,tab_mflops); + + // regularizeing + + regularize_curve(regularize_filename,tab_mflops,tab_sizes,start_cut_size,stop_cut_size); + + +} + +////////////////////////////////////////////////////////////////////////////////////// + +void regularize_curve(const string & filename, + const vector & tab_mflops, + const vector & tab_sizes, + int start_cut_size, int stop_cut_size) +{ + int size=tab_mflops.size(); + ofstream output_file (filename.c_str(),ios::out) ; + + int i=0; + + while(tab_sizes[i] & tab_sizes, vector & tab_mflops){ + + ifstream input_file (filename.c_str(),ios::in) ; + + if (!input_file){ + INFOS("!!! Error opening "<> size >> mflops ){ + nb_point++; + tab_sizes.push_back(size); + tab_mflops.push_back(mflops); + } + SCRUTE(nb_point); + + input_file.close(); +} + diff --git a/thirdparty/eigen/bench/btl/data/smooth.cxx b/thirdparty/eigen/bench/btl/data/smooth.cxx new file mode 100644 index 000000000..e5270cc32 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/smooth.cxx @@ -0,0 +1,198 @@ +//===================================================== +// File : smooth.cxx +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include +#include +#include +#include +#include +#include "bench_parameter.hh" +#include + +using namespace std; + +void read_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops); +void write_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops); +void smooth_curve(const vector & tab_mflops, vector & smooth_tab_mflops,int window_half_width); +void centered_smooth_curve(const vector & tab_mflops, vector & smooth_tab_mflops,int window_half_width); + +///////////////////////////////////////////////////////////////////////////////////////////////// + +int main( int argc , char *argv[] ) +{ + + // input data + + if (argc<3){ + INFOS("!!! Error ... usage : main filename window_half_width smooth_filename"); + exit(0); + } + INFOS(argc); + + int window_half_width=atoi(argv[2]); + + string filename=argv[1]; + string smooth_filename=argv[3]; + + INFOS(filename); + INFOS("window_half_width="< tab_sizes; + vector tab_mflops; + + read_xy_file(filename,tab_sizes,tab_mflops); + + // smoothing + + vector smooth_tab_mflops; + + //smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width); + centered_smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width); + + // output result + + write_xy_file(smooth_filename,tab_sizes,smooth_tab_mflops); + + +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template +double weighted_mean(const VECTOR & data) +{ + + double mean=0.0; + + for (int i=0 ; i & tab_mflops, vector & smooth_tab_mflops,int window_half_width){ + + int window_width=2*window_half_width+1; + + int size=tab_mflops.size(); + + vector sample(window_width); + + for (int i=0 ; i < size ; i++){ + + for ( int j=0 ; j < window_width ; j++ ){ + + int shifted_index=i+j-window_half_width; + if (shifted_index<0) shifted_index=0; + if (shifted_index>size-1) shifted_index=size-1; + sample[j]=tab_mflops[shifted_index]; + + } + + smooth_tab_mflops.push_back(weighted_mean(sample)); + + } + +} + +void centered_smooth_curve(const vector & tab_mflops, vector & smooth_tab_mflops,int window_half_width){ + + int max_window_width=2*window_half_width+1; + + int size=tab_mflops.size(); + + + for (int i=0 ; i < size ; i++){ + + deque sample; + + + sample.push_back(tab_mflops[i]); + + for ( int j=1 ; j <= window_half_width ; j++ ){ + + int before=i-j; + int after=i+j; + + if ((before>=0)&&(after & tab_sizes, vector & tab_mflops){ + + ofstream output_file (filename.c_str(),ios::out) ; + + for (int i=0 ; i < tab_sizes.size() ; i++) + { + output_file << tab_sizes[i] << " " << tab_mflops[i] << endl ; + } + + output_file.close(); + +} + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void read_xy_file(const string & filename, vector & tab_sizes, vector & tab_mflops){ + + ifstream input_file (filename.c_str(),ios::in) ; + + if (!input_file){ + INFOS("!!! Error opening "<> size >> mflops ){ + nb_point++; + tab_sizes.push_back(size); + tab_mflops.push_back(mflops); + } + SCRUTE(nb_point); + + input_file.close(); +} + diff --git a/thirdparty/eigen/bench/btl/data/smooth_all.sh b/thirdparty/eigen/bench/btl/data/smooth_all.sh new file mode 100755 index 000000000..3e5bfdf47 --- /dev/null +++ b/thirdparty/eigen/bench/btl/data/smooth_all.sh @@ -0,0 +1,68 @@ +#! /bin/bash +ORIG_DIR=$1 +SMOOTH_DIR=${ORIG_DIR}_smooth +mkdir ${SMOOTH_DIR} + +AXPY_FILE=`find ${ORIG_DIR} -name "*.dat" | grep axpy` +for FILE in ${AXPY_FILE} +do + echo $FILE + BASE=${FILE##*/} + ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp + ./regularize ${SMOOTH_DIR}/${BASE}_tmp 2500 15000 ${SMOOTH_DIR}/${BASE} + rm -f ${SMOOTH_DIR}/${BASE}_tmp +done + + +MATRIX_VECTOR_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_vector` +for FILE in ${MATRIX_VECTOR_FILE} +do + echo $FILE + BASE=${FILE##*/} + ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp + ./regularize ${SMOOTH_DIR}/${BASE}_tmp 50 180 ${SMOOTH_DIR}/${BASE} + rm -f ${SMOOTH_DIR}/${BASE}_tmp +done + +MATRIX_MATRIX_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_matrix` +for FILE in ${MATRIX_MATRIX_FILE} +do + echo $FILE + BASE=${FILE##*/} + ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE} +done + +AAT_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _aat` +for FILE in ${AAT_FILE} +do + echo $FILE + BASE=${FILE##*/} + ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE} +done + + +ATA_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _ata` +for FILE in ${ATA_FILE} +do + echo $FILE + BASE=${FILE##*/} + ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE} +done + +### no smoothing for tinyvector and matrices libs + +TINY_BLITZ_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tiny_blitz` +for FILE in ${TINY_BLITZ_FILE} +do + echo $FILE + BASE=${FILE##*/} + cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE} +done + +TVMET_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tvmet` +for FILE in ${TVMET_FILE} +do + echo $FILE + BASE=${FILE##*/} + cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE} +done diff --git a/thirdparty/eigen/bench/btl/generic_bench/bench.hh b/thirdparty/eigen/bench/btl/generic_bench/bench.hh new file mode 100644 index 000000000..7b7b951b5 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/bench.hh @@ -0,0 +1,168 @@ +//===================================================== +// File : bench.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BENCH_HH +#define BENCH_HH + +#include "btl.hh" +#include "bench_parameter.hh" +#include +#include "utilities.h" +#include "size_lin_log.hh" +#include "xy_file.hh" +#include +#include +#include "timers/portable_perf_analyzer.hh" +// #include "timers/mixed_perf_analyzer.hh" +// #include "timers/x86_perf_analyzer.hh" +// #include "timers/STL_perf_analyzer.hh" +#ifdef HAVE_MKL +extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int); +#endif +using namespace std; + +template class Perf_Analyzer, class Action> +BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) +{ + if (BtlConfig::skipAction(Action::name())) + return; + + string filename="bench_"+Action::name()+".dat"; + + INFOS("starting " < tab_mflops(nb_point); + std::vector tab_sizes(nb_point); + + // matrices and vector size calculations + size_lin_log(nb_point,size_min,size_max,tab_sizes); + + std::vector oldSizes; + std::vector oldFlops; + bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true); + int oldi = oldSizes.size() - 1; + + // loop on matrix size + Perf_Analyzer perf_action; + for (int i=nb_point-1;i>=0;i--) + { + //INFOS("size=" <=0 && oldSizes[oldi]>tab_sizes[i]) + --oldi; + if (oldi>=0 && oldSizes[oldi]==tab_sizes[i]) + { + if (oldFlops[oldi] "; + else + std::cout << "\t < "; + std::cout << oldFlops[oldi]; + } + --oldi; + } + std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl; + } + + if (!BtlConfig::Instance.overwriteResults) + { + if (hasOldResults) + { + // merge the two data + std::vector newSizes; + std::vector newFlops; + unsigned int i=0; + unsigned int j=0; + while (i +BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){ + + // if the rdtsc is not available : + bench(size_min,size_max,nb_point); + // if the rdtsc is available : +// bench(size_min,size_max,nb_point); + + + // Only for small problem size. Otherwize it will be too long +// bench(size_min,size_max,nb_point); +// bench(size_min,size_max,nb_point); + +} + +#endif diff --git a/thirdparty/eigen/bench/btl/generic_bench/bench_parameter.hh b/thirdparty/eigen/bench/btl/generic_bench/bench_parameter.hh new file mode 100644 index 000000000..2b01149f9 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/bench_parameter.hh @@ -0,0 +1,53 @@ +//===================================================== +// File : bench_parameter.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BENCH_PARAMETER_HH +#define BENCH_PARAMETER_HH + +// minimal time for each measurement +#define REAL_TYPE float +// minimal time for each measurement +#define MIN_TIME 0.2 +// nb of point on bench curves +#define NB_POINT 100 +// min vector size for axpy bench +#define MIN_AXPY 5 +// max vector size for axpy bench +#define MAX_AXPY 3000000 +// min matrix size for matrix vector product bench +#define MIN_MV 5 +// max matrix size for matrix vector product bench +#define MAX_MV 5000 +// min matrix size for matrix matrix product bench +#define MIN_MM 5 +// max matrix size for matrix matrix product bench +#define MAX_MM MAX_MV +// min matrix size for LU bench +#define MIN_LU 5 +// max matrix size for LU bench +#define MAX_LU 3000 +// max size for tiny vector and matrix +#define TINY_MV_MAX_SIZE 16 +// default nb_sample for x86 timer +#define DEFAULT_NB_SAMPLE 1000 + +// how many times we run a single bench (keep the best perf) +#define DEFAULT_NB_TRIES 3 + +#endif diff --git a/thirdparty/eigen/bench/btl/generic_bench/btl.hh b/thirdparty/eigen/bench/btl/generic_bench/btl.hh new file mode 100644 index 000000000..706b00fb0 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/btl.hh @@ -0,0 +1,242 @@ +//===================================================== +// File : btl.hh +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BTL_HH +#define BTL_HH + +#include "bench_parameter.hh" +#include +#include +#include +#include +#include "utilities.h" + +#if (defined __GNUC__) +#define BTL_ALWAYS_INLINE __attribute__((always_inline)) inline +#else +#define BTL_ALWAYS_INLINE inline +#endif + +#if (defined __GNUC__) +#define BTL_DONT_INLINE __attribute__((noinline)) +#else +#define BTL_DONT_INLINE +#endif + +#if (defined __GNUC__) +#define BTL_ASM_COMMENT(X) asm("#" X) +#else +#define BTL_ASM_COMMENT(X) +#endif + +#ifdef __SSE__ +#include "xmmintrin.h" +// This enables flush to zero (FTZ) and denormals are zero (DAZ) modes: +#define BTL_DISABLE_SSE_EXCEPTIONS() { _mm_setcsr(_mm_getcsr() | 0x8040); } +#else +#define BTL_DISABLE_SSE_EXCEPTIONS() +#endif + +/** Enhanced std::string +*/ +class BtlString : public std::string +{ +public: + BtlString() : std::string() {} + BtlString(const BtlString& str) : std::string(static_cast(str)) {} + BtlString(const std::string& str) : std::string(str) {} + BtlString(const char* str) : std::string(str) {} + + operator const char* () const { return c_str(); } + + void trim( bool left = true, bool right = true ) + { + int lspaces, rspaces, len = length(), i; + lspaces = rspaces = 0; + + if ( left ) + for (i=0; i=0 && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); rspaces++,i--); + + *this = substr(lspaces, len-lspaces-rspaces); + } + + std::vector split( const BtlString& delims = "\t\n ") const + { + std::vector ret; + unsigned int numSplits = 0; + size_t start, pos; + start = 0; + do + { + pos = find_first_of(delims, start); + if (pos == start) + { + ret.push_back(""); + start = pos + 1; + } + else if (pos == npos) + ret.push_back( substr(start) ); + else + { + ret.push_back( substr(start, pos - start) ); + start = pos + 1; + } + //start = find_first_not_of(delims, start); + ++numSplits; + } while (pos != npos); + return ret; + } + + bool endsWith(const BtlString& str) const + { + if(str.size()>this->size()) + return false; + return this->substr(this->size()-str.size(),str.size()) == str; + } + bool contains(const BtlString& str) const + { + return this->find(str)size(); + } + bool beginsWith(const BtlString& str) const + { + if(str.size()>this->size()) + return false; + return this->substr(0,str.size()) == str; + } + + BtlString toLowerCase( void ) + { + std::transform(begin(), end(), begin(), static_cast(::tolower) ); + return *this; + } + BtlString toUpperCase( void ) + { + std::transform(begin(), end(), begin(), static_cast(::toupper) ); + return *this; + } + + /** Case insensitive comparison. + */ + bool isEquiv(const BtlString& str) const + { + BtlString str0 = *this; + str0.toLowerCase(); + BtlString str1 = str; + str1.toLowerCase(); + return str0 == str1; + } + + /** Decompose the current string as a path and a file. + For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext" + */ + void decomposePathAndFile(BtlString& path, BtlString& filename) const + { + std::vector elements = this->split("/\\"); + path = ""; + filename = elements.back(); + elements.pop_back(); + if (this->at(0)=='/') + path = "/"; + for (unsigned int i=0 ; i config = BtlString(_config).split(" \t\n"); + for (unsigned int i = 0; i m_selectedActionNames; +}; + +#define BTL_MAIN \ + BtlConfig BtlConfig::Instance + +#endif // BTL_HH diff --git a/thirdparty/eigen/bench/btl/generic_bench/init/init_function.hh b/thirdparty/eigen/bench/btl/generic_bench/init/init_function.hh new file mode 100644 index 000000000..e467cb648 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/init/init_function.hh @@ -0,0 +1,54 @@ +//===================================================== +// File : init_function.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef INIT_FUNCTION_HH +#define INIT_FUNCTION_HH + +double simple_function(int index) +{ + return index; +} + +double simple_function(int index_i, int index_j) +{ + return index_i+index_j; +} + +double pseudo_random(int /*index*/) +{ + return std::rand()/double(RAND_MAX); +} + +double pseudo_random(int /*index_i*/, int /*index_j*/) +{ + return std::rand()/double(RAND_MAX); +} + + +double null_function(int /*index*/) +{ + return 0.0; +} + +double null_function(int /*index_i*/, int /*index_j*/) +{ + return 0.0; +} + +#endif diff --git a/thirdparty/eigen/bench/btl/generic_bench/init/init_matrix.hh b/thirdparty/eigen/bench/btl/generic_bench/init/init_matrix.hh new file mode 100644 index 000000000..6382d30c8 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/init/init_matrix.hh @@ -0,0 +1,64 @@ +//===================================================== +// File : init_matrix.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef INIT_MATRIX_HH +#define INIT_MATRIX_HH + +// The Vector class must satisfy the following part of STL vector concept : +// resize() method +// [] operator for setting element +// value_type defined +template +BTL_DONT_INLINE void init_row(Vector & X, int size, int row){ + + X.resize(size); + + for (unsigned int j=0;j +BTL_DONT_INLINE void init_matrix(Vector & A, int size){ + A.resize(size); + for (unsigned int row=0; row(A[row],size,row); + } +} + +template +BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size){ + A.resize(size); + for (unsigned int row=0; row +// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef INIT_VECTOR_HH +#define INIT_VECTOR_HH + +// The Vector class must satisfy the following part of STL vector concept : +// resize() method +// [] operator for setting element +// value_type defined +template +void init_vector(Vector & X, int size){ + + X.resize(size); + + for (unsigned int i=0;i +// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BENCH_STATIC_HH +#define BENCH_STATIC_HH + +#include "btl.hh" +#include "bench_parameter.hh" +#include +#include "utilities.h" +#include "xy_file.hh" +#include "static/static_size_generator.hh" +#include "timers/portable_perf_analyzer.hh" +// #include "timers/mixed_perf_analyzer.hh" +// #include "timers/x86_perf_analyzer.hh" + +using namespace std; + + +template class Perf_Analyzer, template class Action, template class Interface> +BTL_DONT_INLINE void bench_static(void) +{ + if (BtlConfig::skipAction(Action >::name())) + return; + + string filename = "bench_" + Action >::name() + ".dat"; + + INFOS("starting " << filename); + + const int max_size = TINY_MV_MAX_SIZE; + + std::vector tab_mflops; + std::vector tab_sizes; + + static_size_generator::go(tab_sizes,tab_mflops); + + dump_xy_file(tab_sizes,tab_mflops,filename); +} + +// default Perf Analyzer +template class Action, template class Interface> +BTL_DONT_INLINE void bench_static(void) +{ + bench_static(); + //bench_static(); + //bench_static(); +} + +#endif + + + + + + + + + + + + + + + diff --git a/thirdparty/eigen/bench/btl/generic_bench/static/intel_bench_fixed_size.hh b/thirdparty/eigen/bench/btl/generic_bench/static/intel_bench_fixed_size.hh new file mode 100644 index 000000000..b4edcbc46 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/static/intel_bench_fixed_size.hh @@ -0,0 +1,66 @@ +//===================================================== +// File : intel_bench_fixed_size.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, mar dc 3 18:59:37 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _BENCH_FIXED_SIZE_HH_ +#define _BENCH_FIXED_SIZE_HH_ + +#include "utilities.h" +#include "function_time.hh" + +template +double bench_fixed_size(int size, unsigned long long & nb_calc,unsigned long long & nb_init) +{ + + Action action(size); + + double time_baseline=time_init(nb_init,action); + + while (time_baseline < MIN_TIME) { + + //INFOS("nb_init="< > > perf_action; + tab_mflops.push_back(perf_action.eval_mflops(SIZE)); + std::cout << tab_mflops.back() << " MFlops" << std::endl; + static_size_generator::go(tab_sizes,tab_mflops); + }; +}; + +//recursion end + +template class Perf_Analyzer, template class Action, template class Interface> +struct static_size_generator<1,Perf_Analyzer,Action,Interface>{ + static void go(vector & tab_sizes, vector & tab_mflops) + { + tab_sizes.push_back(1); + Perf_Analyzer > > perf_action; + tab_mflops.push_back(perf_action.eval_mflops(1)); + }; +}; + +#endif + + + + diff --git a/thirdparty/eigen/bench/btl/generic_bench/timers/STL_perf_analyzer.hh b/thirdparty/eigen/bench/btl/generic_bench/timers/STL_perf_analyzer.hh new file mode 100644 index 000000000..c9f894b1f --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/timers/STL_perf_analyzer.hh @@ -0,0 +1,82 @@ +//===================================================== +// File : STL_perf_analyzer.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, mar dc 3 18:59:35 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _STL_PERF_ANALYSER_HH +#define _STL_PERF_ANALYSER_HH + +#include "STL_timer.hh" +#include "bench_parameter.hh" + +template +class STL_Perf_Analyzer{ +public: + STL_Perf_Analyzer(unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos() + { + MESSAGE("STL_Perf_Analyzer Ctor"); + }; + STL_Perf_Analyzer( const STL_Perf_Analyzer & ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~STL_Perf_Analyzer( void ){ + MESSAGE("STL_Perf_Analyzer Dtor"); + }; + + + inline double eval_mflops(int size) + { + + ACTION action(size); + + _chronos.start_baseline(_nb_sample); + + do { + + action.initialize(); + } while (_chronos.check()); + + double baseline_time=_chronos.get_time(); + + _chronos.start(_nb_sample); + do { + action.initialize(); + action.calculate(); + } while (_chronos.check()); + + double calculate_time=_chronos.get_time(); + + double corrected_time=calculate_time-baseline_time; + + // cout << size <<" "< +// Copyright (C) EDF R&D, mar dc 3 18:59:35 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +// STL Timer Class. Adapted (L.P.) from the timer class by Musser et Al +// described int the Book : STL Tutorial and reference guide. +// Define a timer class for analyzing algorithm performance. +#include +#include +#include +#include +#include +using namespace std; + +class STL_Timer { +public: + STL_Timer(){ baseline = false; }; // Default constructor + // Start a series of r trials: + void start(unsigned int r){ + reps = r; + count = 0; + iterations.clear(); + iterations.reserve(reps); + initial = time(0); + }; + // Start a series of r trials to determine baseline time: + void start_baseline(unsigned int r) + { + baseline = true; + start(r); + } + // Returns true if the trials have been completed, else false + bool check() + { + ++count; + final = time(0); + if (initial < final) { + iterations.push_back(count); + initial = final; + count = 0; + } + return (iterations.size() < reps); + }; + // Returns the results for external use + double get_time( void ) + { + sort(iterations.begin(), iterations.end()); + return 1.0/iterations[reps/2]; + }; +private: + unsigned int reps; // Number of trials + // For storing loop iterations of a trial + vector iterations; + // For saving initial and final times of a trial + time_t initial, final; + // For counting loop iterations of a trial + unsigned long count; + // true if this is a baseline computation, false otherwise + bool baseline; + // For recording the baseline time + double baseline_time; +}; + diff --git a/thirdparty/eigen/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh b/thirdparty/eigen/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh new file mode 100644 index 000000000..e190236e0 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/timers/mixed_perf_analyzer.hh @@ -0,0 +1,73 @@ +//===================================================== +// File : mixed_perf_analyzer.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, mar dc 3 18:59:36 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _MIXED_PERF_ANALYSER_HH +#define _MIXED_PERF_ANALYSER_HH + +#include "x86_perf_analyzer.hh" +#include "portable_perf_analyzer.hh" + +// choose portable perf analyzer for long calculations and x86 analyser for short ones + + +template +class Mixed_Perf_Analyzer{ + +public: + Mixed_Perf_Analyzer( void ):_x86pa(),_ppa(),_use_ppa(true) + { + MESSAGE("Mixed_Perf_Analyzer Ctor"); + }; + Mixed_Perf_Analyzer( const Mixed_Perf_Analyzer & ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~Mixed_Perf_Analyzer( void ){ + MESSAGE("Mixed_Perf_Analyzer Dtor"); + }; + + + inline double eval_mflops(int size) + { + + double result=0.0; + if (_use_ppa){ + result=_ppa.eval_mflops(size); + if (_ppa.get_nb_calc()>DEFAULT_NB_SAMPLE){_use_ppa=false;} + } + else{ + result=_x86pa.eval_mflops(size); + } + + return result; + } + +private: + + Portable_Perf_Analyzer _ppa; + X86_Perf_Analyzer _x86pa; + bool _use_ppa; + +}; + +#endif + + + + diff --git a/thirdparty/eigen/bench/btl/generic_bench/timers/portable_perf_analyzer.hh b/thirdparty/eigen/bench/btl/generic_bench/timers/portable_perf_analyzer.hh new file mode 100644 index 000000000..5e579fb49 --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/timers/portable_perf_analyzer.hh @@ -0,0 +1,103 @@ +//===================================================== +// File : portable_perf_analyzer.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, mar d�c 3 18:59:35 CET 2002 +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _PORTABLE_PERF_ANALYZER_HH +#define _PORTABLE_PERF_ANALYZER_HH + +#include "utilities.h" +#include "timers/portable_timer.hh" + +template +class Portable_Perf_Analyzer{ +public: + Portable_Perf_Analyzer( ):_nb_calc(0), m_time_action(0), _chronos(){ + MESSAGE("Portable_Perf_Analyzer Ctor"); + }; + Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~Portable_Perf_Analyzer(){ + MESSAGE("Portable_Perf_Analyzer Dtor"); + }; + + BTL_DONT_INLINE double eval_mflops(int size) + { + Action action(size); + +// action.initialize(); +// time_action = time_calculate(action); + while (m_time_action < MIN_TIME) + { + if(_nb_calc==0) _nb_calc = 1; + else _nb_calc *= 2; + action.initialize(); + m_time_action = time_calculate(action); + } + + // optimize + for (int i=1; i +// Copyright (C) EDF R&D, mar d�c 3 18:59:35 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _PORTABLE_PERF_ANALYZER_HH +#define _PORTABLE_PERF_ANALYZER_HH + +#include "utilities.h" +#include "timers/portable_timer.hh" + +template +class Portable_Perf_Analyzer{ +public: + Portable_Perf_Analyzer( void ):_nb_calc(1),_nb_init(1),_chronos(){ + MESSAGE("Portable_Perf_Analyzer Ctor"); + }; + Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~Portable_Perf_Analyzer( void ){ + MESSAGE("Portable_Perf_Analyzer Dtor"); + }; + + + + inline double eval_mflops(int size) + { + + Action action(size); + +// double time_baseline = time_init(action); +// while (time_baseline < MIN_TIME_INIT) +// { +// _nb_init *= 2; +// time_baseline = time_init(action); +// } +// +// // optimize +// for (int i=1; i +#include + + +class Portable_Timer +{ + public: + + Portable_Timer() + { + } + + void start() + { + m_start_time = double(mach_absolute_time())*1e-9;; + + } + + void stop() + { + m_stop_time = double(mach_absolute_time())*1e-9;; + + } + + double elapsed() + { + return user_time(); + } + + double user_time() + { + return m_stop_time - m_start_time; + } + + +private: + + double m_stop_time, m_start_time; + +}; // Portable_Timer (Apple) + +#else + +#include +#include +#include +#include + +class Portable_Timer +{ + public: + + Portable_Timer() + { + m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID; + } + + Portable_Timer(int clkid) : m_clkid(clkid) + {} + + void start() + { + timespec ts; + clock_gettime(m_clkid, &ts); + m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); + + } + + void stop() + { + timespec ts; + clock_gettime(m_clkid, &ts); + m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); + + } + + double elapsed() + { + return user_time(); + } + + double user_time() + { + return m_stop_time - m_start_time; + } + + +private: + + int m_clkid; + double m_stop_time, m_start_time; + +}; // Portable_Timer (Linux) + +#endif + +#endif // PORTABLE_TIMER_HPP diff --git a/thirdparty/eigen/bench/btl/generic_bench/timers/x86_perf_analyzer.hh b/thirdparty/eigen/bench/btl/generic_bench/timers/x86_perf_analyzer.hh new file mode 100644 index 000000000..37ea21dcc --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/timers/x86_perf_analyzer.hh @@ -0,0 +1,108 @@ +//===================================================== +// File : x86_perf_analyzer.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, mar d�c 3 18:59:35 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _X86_PERF_ANALYSER_HH +#define _X86_PERF_ANALYSER_HH + +#include "x86_timer.hh" +#include "bench_parameter.hh" + +template +class X86_Perf_Analyzer{ +public: + X86_Perf_Analyzer( unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos() + { + MESSAGE("X86_Perf_Analyzer Ctor"); + _chronos.find_frequency(); + }; + X86_Perf_Analyzer( const X86_Perf_Analyzer & ){ + INFOS("Copy Ctor not implemented"); + exit(0); + }; + ~X86_Perf_Analyzer( void ){ + MESSAGE("X86_Perf_Analyzer Dtor"); + }; + + + inline double eval_mflops(int size) + { + + ACTION action(size); + + int nb_loop=5; + double calculate_time=0.0; + double baseline_time=0.0; + + for (int j=0 ; j < nb_loop ; j++){ + + _chronos.clear(); + + for(int i=0 ; i < _nb_sample ; i++) + { + _chronos.start(); + action.initialize(); + action.calculate(); + _chronos.stop(); + _chronos.add_get_click(); + } + + calculate_time += double(_chronos.get_shortest_clicks())/_chronos.frequency(); + + if (j==0) action.check_result(); + + _chronos.clear(); + + for(int i=0 ; i < _nb_sample ; i++) + { + _chronos.start(); + action.initialize(); + _chronos.stop(); + _chronos.add_get_click(); + + } + + baseline_time+=double(_chronos.get_shortest_clicks())/_chronos.frequency(); + + } + + double corrected_time = (calculate_time-baseline_time)/double(nb_loop); + + +// INFOS("_nb_sample="<<_nb_sample); +// INFOS("baseline_time="< +// Copyright (C) EDF R&D, mar d�c 3 18:59:35 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef _X86_TIMER_HH +#define _X86_TIMER_HH + +#include +#include +#include +#include +//#include "system_time.h" +#define u32 unsigned int +#include +#include "utilities.h" +#include +#include +#include +#include + +// frequence de la becanne en Hz +//#define FREQUENCY 648000000 +//#define FREQUENCY 1400000000 +#define FREQUENCY 1695000000 + +using namespace std; + + +class X86_Timer { + +public : + + X86_Timer( void ):_frequency(FREQUENCY),_nb_sample(0) + { + MESSAGE("X86_Timer Default Ctor"); + } + + inline void start( void ){ + + rdtsc(_click_start.n32[0],_click_start.n32[1]); + + } + + + inline void stop( void ){ + + rdtsc(_click_stop.n32[0],_click_stop.n32[1]); + + } + + + inline double frequency( void ){ + return _frequency; + } + + double get_elapsed_time_in_second( void ){ + + return (_click_stop.n64-_click_start.n64)/double(FREQUENCY); + + + } + + unsigned long long get_click( void ){ + + return (_click_stop.n64-_click_start.n64); + + } + + inline void find_frequency( void ){ + + time_t initial, final; + int dummy=2; + + initial = time(0); + start(); + do { + dummy+=2; + } + while(time(0)==initial); + // On est au debut d'un cycle d'une seconde !!! + initial = time(0); + start(); + do { + dummy+=2; + } + while(time(0)==initial); + final=time(0); + stop(); + // INFOS("fine grained time : "<< get_elapsed_time_in_second()); + // INFOS("coarse grained time : "<< final-initial); + _frequency=_frequency*get_elapsed_time_in_second()/double(final-initial); + /// INFOS("CPU frequency : "<< _frequency); + + } + + void add_get_click( void ){ + + _nb_sample++; + _counted_clicks[get_click()]++; + fill_history_clicks(); + + } + + void dump_statistics(string filemane){ + + ofstream outfile (filemane.c_str(),ios::out) ; + + std::map::iterator itr; + for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++) + { + outfile << (*itr).first << " " << (*itr).second << endl ; + } + + outfile.close(); + + } + + void dump_history(string filemane){ + + ofstream outfile (filemane.c_str(),ios::out) ; + + + + for(int i=0 ; i<_history_mean_clicks.size() ; i++) + { + outfile << i << " " + << _history_mean_clicks[i] << " " + << _history_shortest_clicks[i] << " " + << _history_most_occured_clicks[i] << endl ; + } + + outfile.close(); + + } + + + + double get_mean_clicks( void ){ + + std::map::iterator itr; + + unsigned long long mean_clicks=0; + + for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++) + { + + mean_clicks+=(*itr).second*(*itr).first; + } + + return mean_clicks/double(_nb_sample); + + } + + double get_shortest_clicks( void ){ + + return double((*_counted_clicks.begin()).first); + + } + + void fill_history_clicks( void ){ + + _history_mean_clicks.push_back(get_mean_clicks()); + _history_shortest_clicks.push_back(get_shortest_clicks()); + _history_most_occured_clicks.push_back(get_most_occured_clicks()); + + } + + + double get_most_occured_clicks( void ){ + + unsigned long long moc=0; + unsigned long long max_occurence=0; + + std::map::iterator itr; + + for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++) + { + + if (max_occurence<=(*itr).second){ + max_occurence=(*itr).second; + moc=(*itr).first; + } + } + + return double(moc); + + } + + void clear( void ) + { + _counted_clicks.clear(); + + _history_mean_clicks.clear(); + _history_shortest_clicks.clear(); + _history_most_occured_clicks.clear(); + + _nb_sample=0; + } + + + +private : + + union + { + unsigned long int n32[2] ; + unsigned long long n64 ; + } _click_start; + + union + { + unsigned long int n32[2] ; + unsigned long long n64 ; + } _click_stop; + + double _frequency ; + + map _counted_clicks; + + vector _history_mean_clicks; + vector _history_shortest_clicks; + vector _history_most_occured_clicks; + + unsigned long long _nb_sample; + + + +}; + + +#endif diff --git a/thirdparty/eigen/bench/btl/generic_bench/utils/size_lin_log.hh b/thirdparty/eigen/bench/btl/generic_bench/utils/size_lin_log.hh new file mode 100644 index 000000000..bbc9f543d --- /dev/null +++ b/thirdparty/eigen/bench/btl/generic_bench/utils/size_lin_log.hh @@ -0,0 +1,70 @@ +//===================================================== +// File : size_lin_log.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, mar dc 3 18:59:37 CET 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef SIZE_LIN_LOG +#define SIZE_LIN_LOG + +#include "size_log.hh" + +template +void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector & X) +{ + int ten=10; + int nine=9; + + X.resize(nb_point); + + if (nb_point>ten){ + + for (int i=0;i +void size_log(const int nb_point, const int size_min, const int size_max, Vector & X) +{ + X.resize(nb_point); + + float ls_min=log(float(size_min)); + float ls_max=log(float(size_max)); + + float ls=0.0; + + float delta_ls=(ls_max-ls_min)/(float(nb_point-1)); + + int size=0; + + for (int i=0;i +//# include ok for gcc3.01 +# include + +/* --- INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */ + +# define HEREWEARE cout< +// Copyright (C) EDF R&D, lun sep 30 14:23:20 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef XY_FILE_HH +#define XY_FILE_HH +#include +#include +#include +#include +using namespace std; + +bool read_xy_file(const std::string & filename, std::vector & tab_sizes, + std::vector & tab_mflops, bool quiet = false) +{ + + std::ifstream input_file (filename.c_str(),std::ios::in); + + if (!input_file){ + if (!quiet) { + INFOS("!!! Error opening "<> size >> mflops ){ + nb_point++; + tab_sizes.push_back(size); + tab_mflops.push_back(mflops); + } + SCRUTE(nb_point); + + input_file.close(); + return true; +} + +// The Vector class must satisfy the following part of STL vector concept : +// resize() method +// [] operator for seting element +// the vector element must have the << operator define + +using namespace std; + +template +void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){ + + ofstream outfile (filename.c_str(),ios::out) ; + int size=X.size(); + + for (int i=0;i BLASFUNC(cdotu) (int *, float *, int *, float *, int *); +std::complex BLASFUNC(cdotc) (int *, float *, int *, float *, int *); +std::complex BLASFUNC(zdotu) (int *, double *, int *, double *, int *); +std::complex BLASFUNC(zdotc) (int *, double *, int *, double *, int *); +double BLASFUNC(xdotu) (int *, double *, int *, double *, int *); +double BLASFUNC(xdotc) (int *, double *, int *, double *, int *); +#endif + +int BLASFUNC(cdotuw) (int *, float *, int *, float *, int *, float*); +int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*); +int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*); +int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*); + +int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *); +int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *); +int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *); +int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *); +int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *); +int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *); + +int BLASFUNC(scopy) (int *, float *, int *, float *, int *); +int BLASFUNC(dcopy) (int *, double *, int *, double *, int *); +int BLASFUNC(qcopy) (int *, double *, int *, double *, int *); +int BLASFUNC(ccopy) (int *, float *, int *, float *, int *); +int BLASFUNC(zcopy) (int *, double *, int *, double *, int *); +int BLASFUNC(xcopy) (int *, double *, int *, double *, int *); + +int BLASFUNC(sswap) (int *, float *, int *, float *, int *); +int BLASFUNC(dswap) (int *, double *, int *, double *, int *); +int BLASFUNC(qswap) (int *, double *, int *, double *, int *); +int BLASFUNC(cswap) (int *, float *, int *, float *, int *); +int BLASFUNC(zswap) (int *, double *, int *, double *, int *); +int BLASFUNC(xswap) (int *, double *, int *, double *, int *); + +float BLASFUNC(sasum) (int *, float *, int *); +float BLASFUNC(scasum)(int *, float *, int *); +double BLASFUNC(dasum) (int *, double *, int *); +double BLASFUNC(qasum) (int *, double *, int *); +double BLASFUNC(dzasum)(int *, double *, int *); +double BLASFUNC(qxasum)(int *, double *, int *); + +int BLASFUNC(isamax)(int *, float *, int *); +int BLASFUNC(idamax)(int *, double *, int *); +int BLASFUNC(iqamax)(int *, double *, int *); +int BLASFUNC(icamax)(int *, float *, int *); +int BLASFUNC(izamax)(int *, double *, int *); +int BLASFUNC(ixamax)(int *, double *, int *); + +int BLASFUNC(ismax) (int *, float *, int *); +int BLASFUNC(idmax) (int *, double *, int *); +int BLASFUNC(iqmax) (int *, double *, int *); +int BLASFUNC(icmax) (int *, float *, int *); +int BLASFUNC(izmax) (int *, double *, int *); +int BLASFUNC(ixmax) (int *, double *, int *); + +int BLASFUNC(isamin)(int *, float *, int *); +int BLASFUNC(idamin)(int *, double *, int *); +int BLASFUNC(iqamin)(int *, double *, int *); +int BLASFUNC(icamin)(int *, float *, int *); +int BLASFUNC(izamin)(int *, double *, int *); +int BLASFUNC(ixamin)(int *, double *, int *); + +int BLASFUNC(ismin)(int *, float *, int *); +int BLASFUNC(idmin)(int *, double *, int *); +int BLASFUNC(iqmin)(int *, double *, int *); +int BLASFUNC(icmin)(int *, float *, int *); +int BLASFUNC(izmin)(int *, double *, int *); +int BLASFUNC(ixmin)(int *, double *, int *); + +float BLASFUNC(samax) (int *, float *, int *); +double BLASFUNC(damax) (int *, double *, int *); +double BLASFUNC(qamax) (int *, double *, int *); +float BLASFUNC(scamax)(int *, float *, int *); +double BLASFUNC(dzamax)(int *, double *, int *); +double BLASFUNC(qxamax)(int *, double *, int *); + +float BLASFUNC(samin) (int *, float *, int *); +double BLASFUNC(damin) (int *, double *, int *); +double BLASFUNC(qamin) (int *, double *, int *); +float BLASFUNC(scamin)(int *, float *, int *); +double BLASFUNC(dzamin)(int *, double *, int *); +double BLASFUNC(qxamin)(int *, double *, int *); + +float BLASFUNC(smax) (int *, float *, int *); +double BLASFUNC(dmax) (int *, double *, int *); +double BLASFUNC(qmax) (int *, double *, int *); +float BLASFUNC(scmax) (int *, float *, int *); +double BLASFUNC(dzmax) (int *, double *, int *); +double BLASFUNC(qxmax) (int *, double *, int *); + +float BLASFUNC(smin) (int *, float *, int *); +double BLASFUNC(dmin) (int *, double *, int *); +double BLASFUNC(qmin) (int *, double *, int *); +float BLASFUNC(scmin) (int *, float *, int *); +double BLASFUNC(dzmin) (int *, double *, int *); +double BLASFUNC(qxmin) (int *, double *, int *); + +int BLASFUNC(sscal) (int *, float *, float *, int *); +int BLASFUNC(dscal) (int *, double *, double *, int *); +int BLASFUNC(qscal) (int *, double *, double *, int *); +int BLASFUNC(cscal) (int *, float *, float *, int *); +int BLASFUNC(zscal) (int *, double *, double *, int *); +int BLASFUNC(xscal) (int *, double *, double *, int *); +int BLASFUNC(csscal)(int *, float *, float *, int *); +int BLASFUNC(zdscal)(int *, double *, double *, int *); +int BLASFUNC(xqscal)(int *, double *, double *, int *); + +float BLASFUNC(snrm2) (int *, float *, int *); +float BLASFUNC(scnrm2)(int *, float *, int *); + +double BLASFUNC(dnrm2) (int *, double *, int *); +double BLASFUNC(qnrm2) (int *, double *, int *); +double BLASFUNC(dznrm2)(int *, double *, int *); +double BLASFUNC(qxnrm2)(int *, double *, int *); + +int BLASFUNC(srot) (int *, float *, int *, float *, int *, float *, float *); +int BLASFUNC(drot) (int *, double *, int *, double *, int *, double *, double *); +int BLASFUNC(qrot) (int *, double *, int *, double *, int *, double *, double *); +int BLASFUNC(csrot) (int *, float *, int *, float *, int *, float *, float *); +int BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *); +int BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *); + +int BLASFUNC(srotg) (float *, float *, float *, float *); +int BLASFUNC(drotg) (double *, double *, double *, double *); +int BLASFUNC(qrotg) (double *, double *, double *, double *); +int BLASFUNC(crotg) (float *, float *, float *, float *); +int BLASFUNC(zrotg) (double *, double *, double *, double *); +int BLASFUNC(xrotg) (double *, double *, double *, double *); + +int BLASFUNC(srotmg)(float *, float *, float *, float *, float *); +int BLASFUNC(drotmg)(double *, double *, double *, double *, double *); + +int BLASFUNC(srotm) (int *, float *, int *, float *, int *, float *); +int BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *); +int BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *); + +/* Level 2 routines */ + +int BLASFUNC(sger)(int *, int *, float *, float *, int *, + float *, int *, float *, int *); +int BLASFUNC(dger)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(qger)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(cgeru)(int *, int *, float *, float *, int *, + float *, int *, float *, int *); +int BLASFUNC(cgerc)(int *, int *, float *, float *, int *, + float *, int *, float *, int *); +int BLASFUNC(zgeru)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(zgerc)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(xgeru)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); +int BLASFUNC(xgerc)(int *, int *, double *, double *, int *, + double *, int *, double *, int *); + +int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *, + double *, int *); + +int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *); + +int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *, + float *, int *); +int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); +int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *, + double *, int *); + +int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(ctpmv) (char *, char *, char *, int *, float *, float *, int *); +int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *); +int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *); + +int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); + +int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *); +int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); +int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *); + +int BLASFUNC(ssymv) (char *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(csymv) (char *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsymv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(sspmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(dspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(qspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(cspmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(zspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(xspmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); + +int BLASFUNC(ssyr) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(dsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(qsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(csyr) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zsyr) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(xsyr) (char *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(ssyr2) (char *, int *, float *, + float *, int *, float *, int *, float *, int *); +int BLASFUNC(dsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(qsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(csyr2) (char *, int *, float *, + float *, int *, float *, int *, float *, int *); +int BLASFUNC(zsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(xsyr2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); + +int BLASFUNC(sspr) (char *, int *, float *, float *, int *, + float *); +int BLASFUNC(dspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(qspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(cspr) (char *, int *, float *, float *, int *, + float *); +int BLASFUNC(zspr) (char *, int *, double *, double *, int *, + double *); +int BLASFUNC(xspr) (char *, int *, double *, double *, int *, + double *); + +int BLASFUNC(sspr2) (char *, int *, float *, + float *, int *, float *, int *, float *); +int BLASFUNC(dspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(qspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(cspr2) (char *, int *, float *, + float *, int *, float *, int *, float *); +int BLASFUNC(zspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(xspr2) (char *, int *, double *, + double *, int *, double *, int *, double *); + +int BLASFUNC(cher) (char *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zher) (char *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(xher) (char *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(chpr) (char *, int *, float *, float *, int *, float *); +int BLASFUNC(zhpr) (char *, int *, double *, double *, int *, double *); +int BLASFUNC(xhpr) (char *, int *, double *, double *, int *, double *); + +int BLASFUNC(cher2) (char *, int *, float *, + float *, int *, float *, int *, float *, int *); +int BLASFUNC(zher2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); +int BLASFUNC(xher2) (char *, int *, double *, + double *, int *, double *, int *, double *, int *); + +int BLASFUNC(chpr2) (char *, int *, float *, + float *, int *, float *, int *, float *); +int BLASFUNC(zhpr2) (char *, int *, double *, + double *, int *, double *, int *, double *); +int BLASFUNC(xhpr2) (char *, int *, double *, + double *, int *, double *, int *, double *); + +int BLASFUNC(chemv) (char *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhemv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhemv) (char *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(chpmv) (char *, int *, float *, float *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhpmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhpmv) (char *, int *, double *, double *, + double *, int *, double *, double *, int *); + +int BLASFUNC(snorm)(char *, int *, int *, float *, int *); +int BLASFUNC(dnorm)(char *, int *, int *, double *, int *); +int BLASFUNC(cnorm)(char *, int *, int *, float *, int *); +int BLASFUNC(znorm)(char *, int *, int *, double *, int *); + +int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +/* Level 3 routines */ + +int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *, + float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *, + float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); + +int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *, + float *, int *, float *, int *, float *, float *, int *); +int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); +int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *, + double *, int *, double *, int *, double *, double *, int *); + +int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *, + float *, float *, int *); +int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *, + double *, double *, int *); +int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *, + float *, float *, int *); +int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *, + double *, double *, int *); + +int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); + +int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *, + float *, float *, int *, float *, int *); +int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); +int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *, + double *, double *, int *, double *, int *); + +int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *, + float *, float *, int *); +int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *, + float *, float *, int *); +int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); + +int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); + +int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); +int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *, + double *, int *, double *, double *, int *); + +int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *, + float *, float *, int *); +int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); +int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *, + double *, double *, int *); + +int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *, + float *, int *, float *, float *, int *); +int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); +int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *, + double*, int *, double *, double *, int *); + +int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *, + double *, int *); +int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *, + float *, int *); +int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *, + double *, int *); + +int BLASFUNC(sgema)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(dgema)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); +int BLASFUNC(cgema)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(zgema)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); + +int BLASFUNC(sgems)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(dgems)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); +int BLASFUNC(cgems)(char *, char *, int *, int *, float *, + float *, int *, float *, float *, int *, float *, int *); +int BLASFUNC(zgems)(char *, char *, int *, int *, double *, + double *, int *, double*, double *, int *, double*, int *); + +int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *); + +int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *); +int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *); +int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *); + +int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *); +int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *); +int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *); +int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *); + +int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); +int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *); + +int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *); +int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *); +int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *); + +int BLASFUNC(spotf2)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *); + +int BLASFUNC(spotrf)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *); + +int BLASFUNC(slauu2)(char *, int *, float *, int *, int *); +int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(clauu2)(char *, int *, float *, int *, int *); +int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *); +int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *); + +int BLASFUNC(slauum)(char *, int *, float *, int *, int *); +int BLASFUNC(dlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(qlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(clauum)(char *, int *, float *, int *, int *); +int BLASFUNC(zlauum)(char *, int *, double *, int *, int *); +int BLASFUNC(xlauum)(char *, int *, double *, int *, int *); + +int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *); + +int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *); +int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *); +int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *); + +int BLASFUNC(spotri)(char *, int *, float *, int *, int *); +int BLASFUNC(dpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(qpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(cpotri)(char *, int *, float *, int *, int *); +int BLASFUNC(zpotri)(char *, int *, double *, int *, int *); +int BLASFUNC(xpotri)(char *, int *, double *, int *, int *); + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/BLAS/blas_interface.hh b/thirdparty/eigen/bench/btl/libs/BLAS/blas_interface.hh new file mode 100644 index 000000000..651054632 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/BLAS/blas_interface.hh @@ -0,0 +1,83 @@ +//===================================================== +// File : blas_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef blas_PRODUIT_MATRICE_VECTEUR_HH +#define blas_PRODUIT_MATRICE_VECTEUR_HH + +#include +#include +extern "C" +{ +#include "blas.h" + + // Cholesky Factorization +// void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info); +// void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info); + void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info ); + void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work, int *lwork, int *info ); + void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info ); + void dgehrd_( const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork, int *info ); + + // LU row pivoting +// void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info ); +// void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info); + // LU full pivoting + void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info ); + void dgetc2_(const int* n, double *a, const int *lda, int *ipiv, int *jpiv, int*info ); +#ifdef HAS_LAPACK +#endif +} + +#define MAKE_STRING2(S) #S +#define MAKE_STRING(S) MAKE_STRING2(S) + +#define CAT2(A,B) A##B +#define CAT(A,B) CAT2(A,B) + + +template class blas_interface; + + +static char notrans = 'N'; +static char trans = 'T'; +static char nonunit = 'N'; +static char lower = 'L'; +static char right = 'R'; +static char left = 'L'; +static int intone = 1; + + + +#define SCALAR float +#define SCALAR_PREFIX s +#include "blas_interface_impl.hh" +#undef SCALAR +#undef SCALAR_PREFIX + + +#define SCALAR double +#define SCALAR_PREFIX d +#include "blas_interface_impl.hh" +#undef SCALAR +#undef SCALAR_PREFIX + +#endif + + + diff --git a/thirdparty/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh b/thirdparty/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh new file mode 100644 index 000000000..fc4ba2a1f --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh @@ -0,0 +1,147 @@ + +#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_) + +template<> class blas_interface : public c_interface_base +{ + +public : + + static SCALAR fone; + static SCALAR fzero; + + static inline std::string name() + { + return MAKE_STRING(CBLASNAME); + } + + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(gemv)(¬rans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone); + } + + static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone); + } + + static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N); + } + + static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){ + BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N); + } + + static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){ + BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s); + } + + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone); + } + + static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); + } + + static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); + } + +// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){ +// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N); +// } + + static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){ + BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N); + } + + static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){ + BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone); + } + + static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){ + BLAS_FUNC(scal)(&N,&b,Y,&intone); + BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone); + } + + static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ + int N2 = N*N; + BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); + char uplo = 'L'; + int info = 0; + BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info); + if(info!=0) std::cerr << "potrf_ error " << info << "\n"; + } + + static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + int N2 = N*N; + BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); + int info = 0; + int * ipiv = (int*)alloca(sizeof(int)*N); + BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info); + if(info!=0) std::cerr << "getrf_ error " << info << "\n"; + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + BLAS_FUNC(copy)(&N, B, &intone, X, &intone); + BLAS_FUNC(trsv)(&lower, ¬rans, &nonunit, &N, L, &N, X, &intone); + } + + static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){ + BLAS_FUNC(copy)(&N, B, &intone, X, &intone); + BLAS_FUNC(trsm)(&right, &lower, ¬rans, &nonunit, &N, &N, &fone, L, &N, X, &N); + } + + static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & /*X*/, int N){ + BLAS_FUNC(trmm)(&left, &lower, ¬rans,&nonunit, &N,&N,&fone,A,&N,B,&N); + } + + #ifdef HAS_LAPACK + + static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + int N2 = N*N; + BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); + int info = 0; + int * ipiv = (int*)alloca(sizeof(int)*N); + int * jpiv = (int*)alloca(sizeof(int)*N); + BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info); + } + + + + static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ + { + int N2 = N*N; + int inc = 1; + BLAS_FUNC(copy)(&N2, X, &inc, C, &inc); + } + int info = 0; + int ilo = 1; + int ihi = N; + int bsize = 64; + int worksize = N*bsize; + SCALAR* d = new SCALAR[N+worksize]; + BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info); + delete[] d; + } + + static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ + { + int N2 = N*N; + int inc = 1; + BLAS_FUNC(copy)(&N2, X, &inc, C, &inc); + } + char uplo = 'U'; + int info = 0; + int bsize = 64; + int worksize = N*bsize; + SCALAR* d = new SCALAR[3*N+worksize]; + BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info); + delete[] d; + } + + #endif // HAS_LAPACK + +}; + +SCALAR blas_interface::fone = SCALAR(1); +SCALAR blas_interface::fzero = SCALAR(0); diff --git a/thirdparty/eigen/bench/btl/libs/BLAS/c_interface_base.h b/thirdparty/eigen/bench/btl/libs/BLAS/c_interface_base.h new file mode 100644 index 000000000..de613803b --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/BLAS/c_interface_base.h @@ -0,0 +1,73 @@ + +#ifndef BTL_C_INTERFACE_BASE_H +#define BTL_C_INTERFACE_BASE_H + +#include "utilities.h" +#include + +template class c_interface_base +{ + +public: + + typedef real real_type; + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef real* gene_matrix; + typedef real* gene_vector; + + static void free_matrix(gene_matrix & A, int /*N*/){ + delete[] A; + } + + static void free_vector(gene_vector & B){ + delete[] B; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + int N = A_stl.size(); + A = new real[N*N]; + for (int j=0;j +// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "blas_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +#include "action_cholesky.hh" +#include "action_lu_decomp.hh" +#include "action_partial_lu.hh" +#include "action_trisolve_matrix.hh" + +#ifdef HAS_LAPACK +#include "action_hessenberg.hh" +#endif + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + + #ifdef HAS_LAPACK +// bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + #endif + + //bench > >(MIN_LU,MAX_LU,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/STL/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/STL/CMakeLists.txt new file mode 100644 index 000000000..4cfc2dcf2 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/STL/CMakeLists.txt @@ -0,0 +1,2 @@ + +btl_add_bench(btl_STL main.cpp OFF) diff --git a/thirdparty/eigen/bench/btl/libs/STL/STL_interface.hh b/thirdparty/eigen/bench/btl/libs/STL/STL_interface.hh new file mode 100644 index 000000000..ef4cc9233 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/STL/STL_interface.hh @@ -0,0 +1,244 @@ +//===================================================== +// File : STL_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef STL_INTERFACE_HH +#define STL_INTERFACE_HH +#include +#include +#include "utilities.h" + +using namespace std; + +template +class STL_interface{ + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef stl_matrix gene_matrix; + + typedef stl_vector gene_vector; + + static inline std::string name( void ) + { + return "STL"; + } + + static void free_matrix(gene_matrix & /*A*/, int /*N*/){} + + static void free_vector(gene_vector & /*B*/){} + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A = A_stl; + } + + static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + B = B_stl; + } + + static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){ + B_stl = B ; + } + + + static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){ + A_stl = A ; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + for (int i=0;i=j) + { + for (int k=0;k > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/blaze/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/blaze/CMakeLists.txt new file mode 100644 index 000000000..e99a0855c --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blaze/CMakeLists.txt @@ -0,0 +1,13 @@ + +find_package(BLAZE) +find_package(Boost COMPONENTS system) +if (BLAZE_FOUND AND Boost_FOUND) + include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) + btl_add_bench(btl_blaze main.cpp) + # Note: The newest blaze version requires C++14. + # Ideally, we should set this depending on the version of Blaze we found + set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14) + if(BUILD_btl_blaze) + target_link_libraries(btl_blaze ${Boost_LIBRARIES}) + endif() +endif () diff --git a/thirdparty/eigen/bench/btl/libs/blaze/blaze_interface.hh b/thirdparty/eigen/bench/btl/libs/blaze/blaze_interface.hh new file mode 100644 index 000000000..ee1523944 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blaze/blaze_interface.hh @@ -0,0 +1,140 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLAZE_INTERFACE_HH +#define BLAZE_INTERFACE_HH + +#include +#include +// using namespace blaze; + +#include + +template +class blaze_interface { + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef blaze::DynamicMatrix gene_matrix; + typedef blaze::DynamicVector gene_vector; + + static inline std::string name() { return "blaze"; } + + static void free_matrix(gene_matrix & A, int N){ + return ; + } + + static void free_vector(gene_vector & B){ + return ; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl[0].size(), A_stl.size()); + + for (int j=0; j ipvt(N); +// lu_factor(R, ipvt); +// } + +// static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ +// X = lower_trisolve(L, B); +// } + + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + cible = source; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + cible = source; + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/blaze/main.cpp b/thirdparty/eigen/bench/btl/libs/blaze/main.cpp new file mode 100644 index 000000000..80e8f4eaa --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blaze/main.cpp @@ -0,0 +1,40 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "blaze_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/blitz/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/blitz/CMakeLists.txt new file mode 100644 index 000000000..880ab7338 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blitz/CMakeLists.txt @@ -0,0 +1,17 @@ + +find_package(Blitz) + +if (BLITZ_FOUND) + include_directories(${BLITZ_INCLUDES}) + + btl_add_bench(btl_blitz btl_blitz.cpp) + if (BUILD_btl_blitz) + target_link_libraries(btl_blitz ${BLITZ_LIBRARIES}) + endif (BUILD_btl_blitz) + + btl_add_bench(btl_tiny_blitz btl_tiny_blitz.cpp OFF) + if (BUILD_btl_tiny_blitz) + target_link_libraries(btl_tiny_blitz ${BLITZ_LIBRARIES}) + endif (BUILD_btl_tiny_blitz) + +endif (BLITZ_FOUND) diff --git a/thirdparty/eigen/bench/btl/libs/blitz/blitz_LU_solve_interface.hh b/thirdparty/eigen/bench/btl/libs/blitz/blitz_LU_solve_interface.hh new file mode 100644 index 000000000..dcb9f567f --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blitz/blitz_LU_solve_interface.hh @@ -0,0 +1,192 @@ +//===================================================== +// File : blitz_LU_solve_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:31 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLITZ_LU_SOLVE_INTERFACE_HH +#define BLITZ_LU_SOLVE_INTERFACE_HH + +#include "blitz/array.h" +#include + +BZ_USING_NAMESPACE(blitz) + +template +class blitz_LU_solve_interface : public blitz_interface +{ + +public : + + typedef typename blitz_interface::gene_matrix gene_matrix; + typedef typename blitz_interface::gene_vector gene_vector; + + typedef blitz::Array Pivot_Vector; + + inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N) + { + + pivot.resize(N); + + } + + inline static void free_Pivot_Vector(Pivot_Vector & pivot) + { + + return; + + } + + + static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end) + { + + real somme=0.; + + for (int j=col_start ; j=big ) big = abs( LU( i, j ) ) ; + } + if( big==0. ) { + INFOS( "blitz_LU_factor::Singular matrix" ) ; + exit( 0 ) ; + } + ImplicitScaling( i ) = 1./big ; + } + // Loop over columns of Crout's method : + for( int j=0; j=big ) { + dum = ImplicitScaling( i )*abs( theSum ) ; + big = dum ; + index_max = i ; + } + } + // Interchanging rows and the scale factor : + if( j!=index_max ) { + for( int k=0; k=0; i-- ) { + theSum = X( i ) ; + // theSum = B( i ) ; + theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ; + // theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ; + // theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ; + // Store a component of the solution vector : + X( i ) = theSum/LU( i, i ) ; + // B( i ) = theSum/LU( i, i ) ; + } + + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/blitz/blitz_interface.hh b/thirdparty/eigen/bench/btl/libs/blitz/blitz_interface.hh new file mode 100644 index 000000000..a67c47c75 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blitz/blitz_interface.hh @@ -0,0 +1,147 @@ +//===================================================== +// File : blitz_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLITZ_INTERFACE_HH +#define BLITZ_INTERFACE_HH + +#include +#include +#include +#include +#include +#include + +BZ_USING_NAMESPACE(blitz) + +template +class blitz_interface{ + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef blitz::Array gene_matrix; + typedef blitz::Array gene_vector; +// typedef blitz::Matrix gene_matrix; +// typedef blitz::Vector gene_vector; + + static inline std::string name() { return "blitz"; } + + static void free_matrix(gene_matrix & A, int N){} + + static void free_vector(gene_vector & B){} + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl[0].size(),A_stl.size()); + for (int j=0; j(source); +// for (int i=0;i(source); + cible = source; + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/blitz/btl_blitz.cpp b/thirdparty/eigen/bench/btl/libs/blitz/btl_blitz.cpp new file mode 100644 index 000000000..16d2b5951 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blitz/btl_blitz.cpp @@ -0,0 +1,51 @@ +//===================================================== +// File : main.cpp +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "blitz_interface.hh" +#include "blitz_LU_solve_interface.hh" +#include "bench.hh" +#include "action_matrix_vector_product.hh" +#include "action_matrix_matrix_product.hh" +#include "action_axpy.hh" +#include "action_lu_solve.hh" +#include "action_ata_product.hh" +#include "action_aat_product.hh" +#include "action_atv_product.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + //bench > >(MIN_LU,MAX_LU,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/blitz/btl_tiny_blitz.cpp b/thirdparty/eigen/bench/btl/libs/blitz/btl_tiny_blitz.cpp new file mode 100644 index 000000000..9fddde752 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blitz/btl_tiny_blitz.cpp @@ -0,0 +1,38 @@ +//===================================================== +// File : main.cpp +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "tiny_blitz_interface.hh" +#include "static/bench_static.hh" +#include "action_matrix_vector_product.hh" +#include "action_matrix_matrix_product.hh" +#include "action_axpy.hh" + +BTL_MAIN; + +int main() +{ + bench_static(); + bench_static(); + bench_static(); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/blitz/tiny_blitz_interface.hh b/thirdparty/eigen/bench/btl/libs/blitz/tiny_blitz_interface.hh new file mode 100644 index 000000000..6b26db72d --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/blitz/tiny_blitz_interface.hh @@ -0,0 +1,106 @@ +//===================================================== +// File : tiny_blitz_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef TINY_BLITZ_INTERFACE_HH +#define TINY_BLITZ_INTERFACE_HH + +#include "blitz/array.h" +#include "blitz/tiny.h" +#include "blitz/tinymat.h" +#include "blitz/tinyvec.h" +#include + +#include + +BZ_USING_NAMESPACE(blitz) + +template +class tiny_blitz_interface +{ + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef TinyVector gene_vector; + typedef TinyMatrix gene_matrix; + + static inline std::string name() { return "tiny_blitz"; } + + static void free_matrix(gene_matrix & A, int N){} + + static void free_vector(gene_vector & B){} + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + for (int j=0; j +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen3_interface.hh" +#include "static/bench_static.hh" +#include "action_matrix_vector_product.hh" +#include "action_matrix_matrix_product.hh" +#include "action_axpy.hh" +#include "action_lu_solve.hh" +#include "action_ata_product.hh" +#include "action_aat_product.hh" +#include "action_atv_product.hh" +#include "action_cholesky.hh" +#include "action_trisolve.hh" + +BTL_MAIN; + +int main() +{ + + bench_static(); + bench_static(); + bench_static(); + bench_static(); + bench_static(); + bench_static(); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen2/eigen2_interface.hh b/thirdparty/eigen/bench/btl/libs/eigen2/eigen2_interface.hh new file mode 100644 index 000000000..1deabdae2 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen2/eigen2_interface.hh @@ -0,0 +1,168 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef EIGEN2_INTERFACE_HH +#define EIGEN2_INTERFACE_HH +// #include +#include +#include +#include +#include +#include +#include "btl.hh" + +using namespace Eigen; + +template +class eigen2_interface +{ + +public : + + enum {IsFixedSize = (SIZE!=Dynamic)}; + + typedef real real_type; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef Eigen::Matrix gene_matrix; + typedef Eigen::Matrix gene_vector; + + static inline std::string name( void ) + { + #if defined(EIGEN_VECTORIZE_SSE) + if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; + #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) + if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; + #else + if (SIZE==Dynamic) return "eigen2_novec"; else return "tiny_eigen2_novec"; + #endif + } + + static void free_matrix(gene_matrix & A, int N) {} + + static void free_vector(gene_vector & B) {} + + static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl[0].size(), A_stl.size()); + + for (int j=0; j().solveTriangular(B); + } + + static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){ + X = L.template marked().solveTriangular(B); + } + + static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ + C = X.llt().matrixL(); +// C = X; +// Cholesky::computeInPlace(C); +// Cholesky::computeInPlaceBlock(C); + } + + static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + C = X.lu().matrixLU(); +// C = X.inverse(); + } + + static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ + C = Tridiagonalization(X).packedMatrix(); + } + + static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ + C = HessenbergDecomposition(X).packedMatrix(); + } + + + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/eigen2/main_adv.cpp b/thirdparty/eigen/bench/btl/libs/eigen2/main_adv.cpp new file mode 100644 index 000000000..fe3368925 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen2/main_adv.cpp @@ -0,0 +1,44 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "action_trisolve.hh" +#include "action_trisolve_matrix.hh" +#include "action_cholesky.hh" +#include "action_hessenberg.hh" +#include "action_lu_decomp.hh" +// #include "action_partial_lu.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen2/main_linear.cpp b/thirdparty/eigen/bench/btl/libs/eigen2/main_linear.cpp new file mode 100644 index 000000000..c17d16c08 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen2/main_linear.cpp @@ -0,0 +1,34 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen2/main_matmat.cpp b/thirdparty/eigen/bench/btl/libs/eigen2/main_matmat.cpp new file mode 100644 index 000000000..cd9dc9cb0 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen2/main_matmat.cpp @@ -0,0 +1,35 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen2/main_vecmat.cpp b/thirdparty/eigen/bench/btl/libs/eigen2/main_vecmat.cpp new file mode 100644 index 000000000..8b66cd2d9 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen2/main_vecmat.cpp @@ -0,0 +1,36 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen2_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); +// bench > >(MIN_MV,MAX_MV,NB_POINT); +// bench > >(MIN_MV,MAX_MV,NB_POINT); +// bench > >(MIN_MV,MAX_MV,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/eigen3/CMakeLists.txt new file mode 100644 index 000000000..00cae23d3 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/CMakeLists.txt @@ -0,0 +1,65 @@ + + +if((NOT EIGEN3_INCLUDE_DIR) AND Eigen_SOURCE_DIR) + # unless EIGEN3_INCLUDE_DIR is defined, let's use current Eigen version + set(EIGEN3_INCLUDE_DIR ${Eigen_SOURCE_DIR}) + set(EIGEN3_FOUND TRUE) +else() + find_package(Eigen3) +endif() + +if (EIGEN3_FOUND) + + include_directories(${EIGEN3_INCLUDE_DIR}) + btl_add_bench(btl_eigen3_linear main_linear.cpp) + btl_add_bench(btl_eigen3_vecmat main_vecmat.cpp) + btl_add_bench(btl_eigen3_matmat main_matmat.cpp) + btl_add_bench(btl_eigen3_adv main_adv.cpp ) + + btl_add_target_property(btl_eigen3_linear COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=eigen3") + btl_add_target_property(btl_eigen3_vecmat COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=eigen3") + btl_add_target_property(btl_eigen3_matmat COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=eigen3") + btl_add_target_property(btl_eigen3_adv COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=eigen3") + + option(BTL_BENCH_NOGCCVEC "also bench Eigen explicit vec without GCC's auto vec" OFF) + if(CMAKE_COMPILER_IS_GNUCXX AND BTL_BENCH_NOGCCVEC) + btl_add_bench(btl_eigen3_nogccvec_linear main_linear.cpp) + btl_add_bench(btl_eigen3_nogccvec_vecmat main_vecmat.cpp) + btl_add_bench(btl_eigen3_nogccvec_matmat main_matmat.cpp) + btl_add_bench(btl_eigen3_nogccvec_adv main_adv.cpp ) + + btl_add_target_property(btl_eigen3_nogccvec_linear COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=eigen3_nogccvec") + btl_add_target_property(btl_eigen3_nogccvec_vecmat COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=eigen3_nogccvec") + btl_add_target_property(btl_eigen3_nogccvec_matmat COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=eigen3_nogccvec") + btl_add_target_property(btl_eigen3_nogccvec_adv COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=eigen3_nogccvec") + endif() + + + if(NOT BTL_NOVEC) + btl_add_bench(btl_eigen3_novec_linear main_linear.cpp OFF) + btl_add_bench(btl_eigen3_novec_vecmat main_vecmat.cpp OFF) + btl_add_bench(btl_eigen3_novec_matmat main_matmat.cpp OFF) + btl_add_bench(btl_eigen3_novec_adv main_adv.cpp OFF) + btl_add_target_property(btl_eigen3_novec_linear COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=eigen3_novec") + btl_add_target_property(btl_eigen3_novec_vecmat COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=eigen3_novec") + btl_add_target_property(btl_eigen3_novec_matmat COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=eigen3_novec") + btl_add_target_property(btl_eigen3_novec_adv COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=eigen3_novec") + +# if(BUILD_btl_eigen3_adv) +# target_link_libraries(btl_eigen3_adv ${MKL_LIBRARIES}) +# endif(BUILD_btl_eigen3_adv) + + endif(NOT BTL_NOVEC) + + btl_add_bench(btl_tiny_eigen3 btl_tiny_eigen3.cpp OFF) + + if(NOT BTL_NOVEC) + btl_add_bench(btl_tiny_eigen3_novec btl_tiny_eigen3.cpp OFF) + btl_add_target_property(btl_tiny_eigen3_novec COMPILE_FLAGS "-DBTL_PREFIX=eigen3_tiny") + + if(BUILD_btl_tiny_eigen3_novec) + btl_add_target_property(btl_tiny_eigen3_novec COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=eigen3_tiny_novec") + endif(BUILD_btl_tiny_eigen3_novec) + endif(NOT BTL_NOVEC) + +endif (EIGEN3_FOUND) diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/btl_tiny_eigen3.cpp b/thirdparty/eigen/bench/btl/libs/eigen3/btl_tiny_eigen3.cpp new file mode 100644 index 000000000..d1515be84 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/btl_tiny_eigen3.cpp @@ -0,0 +1,46 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen3_interface.hh" +#include "static/bench_static.hh" +#include "action_matrix_vector_product.hh" +#include "action_matrix_matrix_product.hh" +#include "action_axpy.hh" +#include "action_lu_solve.hh" +#include "action_ata_product.hh" +#include "action_aat_product.hh" +#include "action_atv_product.hh" +#include "action_cholesky.hh" +#include "action_trisolve.hh" + +BTL_MAIN; + +int main() +{ + + bench_static(); + bench_static(); + bench_static(); + bench_static(); + bench_static(); + bench_static(); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/eigen3_interface.hh b/thirdparty/eigen/bench/btl/libs/eigen3/eigen3_interface.hh new file mode 100644 index 000000000..b821fd721 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/eigen3_interface.hh @@ -0,0 +1,240 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef EIGEN3_INTERFACE_HH +#define EIGEN3_INTERFACE_HH + +#include +#include +#include "btl.hh" + +using namespace Eigen; + +template +class eigen3_interface +{ + +public : + + enum {IsFixedSize = (SIZE!=Dynamic)}; + + typedef real real_type; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef Eigen::Matrix gene_matrix; + typedef Eigen::Matrix gene_vector; + + static inline std::string name( void ) + { + return EIGEN_MAKESTRING(BTL_PREFIX); + } + + static void free_matrix(gene_matrix & /*A*/, int /*N*/) {} + + static void free_vector(gene_vector & /*B*/) {} + + static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl[0].size(), A_stl.size()); + + for (unsigned int j=0; j().setZero(); + X.template selfadjointView().rankUpdate(A); + } + + static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int /*N*/){ + X.noalias() = A*B; + } + + static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int /*N*/){ + X.noalias() = (A.template selfadjointView() * B); +// internal::product_selfadjoint_vector(N,A.data(),N, B.data(), 1, X.data(), 1); + } + + template static void triassign(Dest& dst, const Src& src) + { + typedef typename Dest::Scalar Scalar; + typedef typename internal::packet_traits::type Packet; + const int PacketSize = sizeof(Packet)/sizeof(Scalar); + int size = dst.cols(); + for(int j=0; j(j, index, src); + else + dst.template copyPacket(index, j, src); + } + + // do the non-vectorizable part of the assignment + for (int index = alignedEnd; index(N,A.data(),N, X.data(), 1, Y.data(), 1, -1); + for(int j=0; j(c,s)); + } + + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int /*N*/){ + X.noalias() = (A.transpose()*B); + } + + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int /*N*/){ + Y += coef * X; + } + + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int /*N*/){ + Y = a*X + b*Y; + } + + static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix & source, gene_matrix & cible, int /*N*/){ + cible = source; + } + + static EIGEN_DONT_INLINE void copy_vector(const gene_vector & source, gene_vector & cible, int /*N*/){ + cible = source; + } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int /*N*/){ + X = L.template triangularView().solve(B); + } + + static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int /*N*/){ + X = L.template triangularView().solve(B); + } + + static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int /*N*/){ + X.noalias() = L.template triangularView() * B; + } + + static inline void cholesky(const gene_matrix & X, gene_matrix & C, int /*N*/){ + C = X; + internal::llt_inplace::blocked(C); + //C = X.llt().matrixL(); +// C = X; +// Cholesky::computeInPlace(C); +// Cholesky::computeInPlaceBlock(C); + } + + static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int /*N*/){ + C = X.fullPivLu().matrixLU(); + } + + static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + Matrix piv(N); + DenseIndex nb; + C = X; + internal::partial_lu_inplace(C,piv,nb); +// C = X.partialPivLu().matrixLU(); + } + + static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ + typename Tridiagonalization::CoeffVectorType aux(N-1); + C = X; + internal::tridiagonalization_inplace(C, aux); + } + + static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int /*N*/){ + C = HessenbergDecomposition(X).packedMatrix(); + } + + + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/main_adv.cpp b/thirdparty/eigen/bench/btl/libs/eigen3/main_adv.cpp new file mode 100644 index 000000000..95865357e --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/main_adv.cpp @@ -0,0 +1,44 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen3_interface.hh" +#include "bench.hh" +#include "action_trisolve.hh" +#include "action_trisolve_matrix.hh" +#include "action_cholesky.hh" +#include "action_hessenberg.hh" +#include "action_lu_decomp.hh" +#include "action_partial_lu.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); +// bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + +// bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/main_linear.cpp b/thirdparty/eigen/bench/btl/libs/eigen3/main_linear.cpp new file mode 100644 index 000000000..e8538b7d0 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/main_linear.cpp @@ -0,0 +1,35 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen3_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/main_matmat.cpp b/thirdparty/eigen/bench/btl/libs/eigen3/main_matmat.cpp new file mode 100644 index 000000000..926fa2b01 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/main_matmat.cpp @@ -0,0 +1,35 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen3_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/eigen3/main_vecmat.cpp b/thirdparty/eigen/bench/btl/libs/eigen3/main_vecmat.cpp new file mode 100644 index 000000000..0dda444cf --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/eigen3/main_vecmat.cpp @@ -0,0 +1,36 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "eigen3_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/gmm/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/gmm/CMakeLists.txt new file mode 100644 index 000000000..bc2586243 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/gmm/CMakeLists.txt @@ -0,0 +1,6 @@ + +find_package(GMM) +if (GMM_FOUND) + include_directories(${GMM_INCLUDES}) + btl_add_bench(btl_gmm main.cpp) +endif (GMM_FOUND) diff --git a/thirdparty/eigen/bench/btl/libs/gmm/gmm_LU_solve_interface.hh b/thirdparty/eigen/bench/btl/libs/gmm/gmm_LU_solve_interface.hh new file mode 100644 index 000000000..dcb9f567f --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/gmm/gmm_LU_solve_interface.hh @@ -0,0 +1,192 @@ +//===================================================== +// File : blitz_LU_solve_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:31 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLITZ_LU_SOLVE_INTERFACE_HH +#define BLITZ_LU_SOLVE_INTERFACE_HH + +#include "blitz/array.h" +#include + +BZ_USING_NAMESPACE(blitz) + +template +class blitz_LU_solve_interface : public blitz_interface +{ + +public : + + typedef typename blitz_interface::gene_matrix gene_matrix; + typedef typename blitz_interface::gene_vector gene_vector; + + typedef blitz::Array Pivot_Vector; + + inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N) + { + + pivot.resize(N); + + } + + inline static void free_Pivot_Vector(Pivot_Vector & pivot) + { + + return; + + } + + + static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end) + { + + real somme=0.; + + for (int j=col_start ; j=big ) big = abs( LU( i, j ) ) ; + } + if( big==0. ) { + INFOS( "blitz_LU_factor::Singular matrix" ) ; + exit( 0 ) ; + } + ImplicitScaling( i ) = 1./big ; + } + // Loop over columns of Crout's method : + for( int j=0; j=big ) { + dum = ImplicitScaling( i )*abs( theSum ) ; + big = dum ; + index_max = i ; + } + } + // Interchanging rows and the scale factor : + if( j!=index_max ) { + for( int k=0; k=0; i-- ) { + theSum = X( i ) ; + // theSum = B( i ) ; + theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ; + // theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ; + // theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ; + // Store a component of the solution vector : + X( i ) = theSum/LU( i, i ) ; + // B( i ) = theSum/LU( i, i ) ; + } + + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/gmm/gmm_interface.hh b/thirdparty/eigen/bench/btl/libs/gmm/gmm_interface.hh new file mode 100644 index 000000000..3ea303c1b --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/gmm/gmm_interface.hh @@ -0,0 +1,144 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef GMM_INTERFACE_HH +#define GMM_INTERFACE_HH + +#include +#include + +using namespace gmm; + +template +class gmm_interface { + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef gmm::dense_matrix gene_matrix; + typedef stl_vector gene_vector; + + static inline std::string name( void ) + { + return "gmm"; + } + + static void free_matrix(gene_matrix & A, int N){ + return ; + } + + static void free_vector(gene_vector & B){ + return ; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl[0].size(),A_stl.size()); + + for (int j=0; j ipvt(N); + gmm::lu_factor(R, ipvt); + } + + static inline void hessenberg(const gene_matrix & X, gene_matrix & R, int N){ + gmm::copy(X,R); + gmm::Hessenberg_reduction(R,X,false); + } + + static inline void tridiagonalization(const gene_matrix & X, gene_matrix & R, int N){ + gmm::copy(X,R); + gmm::Householder_tridiagonalization(R,X,false); + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/gmm/main.cpp b/thirdparty/eigen/bench/btl/libs/gmm/main.cpp new file mode 100644 index 000000000..1f0c051eb --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/gmm/main.cpp @@ -0,0 +1,51 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "gmm_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" +#include "action_hessenberg.hh" +#include "action_partial_lu.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + //bench > >(MIN_LU,MAX_LU,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/mtl4/.kdbgrc.main b/thirdparty/eigen/bench/btl/libs/mtl4/.kdbgrc.main new file mode 100644 index 000000000..fed082f7f --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/mtl4/.kdbgrc.main @@ -0,0 +1,12 @@ +[General] +DebuggerCmdStr= +DriverName=GDB +FileVersion=1 +OptionsSelected= +ProgramArgs= +TTYLevel=7 +WorkingDirectory= + +[Memory] +ColumnWidths=80,0 +NumExprs=0 diff --git a/thirdparty/eigen/bench/btl/libs/mtl4/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/mtl4/CMakeLists.txt new file mode 100644 index 000000000..14b47a808 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/mtl4/CMakeLists.txt @@ -0,0 +1,6 @@ + +find_package(MTL4) +if (MTL4_FOUND) + include_directories(${MTL4_INCLUDE_DIR}) + btl_add_bench(btl_mtl4 main.cpp) +endif (MTL4_FOUND) diff --git a/thirdparty/eigen/bench/btl/libs/mtl4/main.cpp b/thirdparty/eigen/bench/btl/libs/mtl4/main.cpp new file mode 100644 index 000000000..96fcfb9c9 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/mtl4/main.cpp @@ -0,0 +1,46 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "mtl4_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" +#include "action_cholesky.hh" +// #include "action_lu_decomp.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/mtl4/mtl4_LU_solve_interface.hh b/thirdparty/eigen/bench/btl/libs/mtl4/mtl4_LU_solve_interface.hh new file mode 100644 index 000000000..dcb9f567f --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/mtl4/mtl4_LU_solve_interface.hh @@ -0,0 +1,192 @@ +//===================================================== +// File : blitz_LU_solve_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:31 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLITZ_LU_SOLVE_INTERFACE_HH +#define BLITZ_LU_SOLVE_INTERFACE_HH + +#include "blitz/array.h" +#include + +BZ_USING_NAMESPACE(blitz) + +template +class blitz_LU_solve_interface : public blitz_interface +{ + +public : + + typedef typename blitz_interface::gene_matrix gene_matrix; + typedef typename blitz_interface::gene_vector gene_vector; + + typedef blitz::Array Pivot_Vector; + + inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N) + { + + pivot.resize(N); + + } + + inline static void free_Pivot_Vector(Pivot_Vector & pivot) + { + + return; + + } + + + static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end) + { + + real somme=0.; + + for (int j=col_start ; j=big ) big = abs( LU( i, j ) ) ; + } + if( big==0. ) { + INFOS( "blitz_LU_factor::Singular matrix" ) ; + exit( 0 ) ; + } + ImplicitScaling( i ) = 1./big ; + } + // Loop over columns of Crout's method : + for( int j=0; j=big ) { + dum = ImplicitScaling( i )*abs( theSum ) ; + big = dum ; + index_max = i ; + } + } + // Interchanging rows and the scale factor : + if( j!=index_max ) { + for( int k=0; k=0; i-- ) { + theSum = X( i ) ; + // theSum = B( i ) ; + theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ; + // theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ; + // theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ; + // Store a component of the solution vector : + X( i ) = theSum/LU( i, i ) ; + // B( i ) = theSum/LU( i, i ) ; + } + + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/mtl4/mtl4_interface.hh b/thirdparty/eigen/bench/btl/libs/mtl4/mtl4_interface.hh new file mode 100644 index 000000000..3795ac61e --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/mtl4/mtl4_interface.hh @@ -0,0 +1,144 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef MTL4_INTERFACE_HH +#define MTL4_INTERFACE_HH + +#include +#include +// #include +#include + +using namespace mtl; + +template +class mtl4_interface { + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef mtl::dense2D > gene_matrix; + typedef mtl::dense_vector gene_vector; + + static inline std::string name() { return "mtl4"; } + + static void free_matrix(gene_matrix & A, int N){ + return ; + } + + static void free_vector(gene_vector & B){ + return ; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.change_dim(A_stl[0].size(), A_stl.size()); + + for (int j=0; j C(N,N); +// C = B; +// X = (A*C); + } + + static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){ + X = (trans(A)*trans(B)); + } + +// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){ +// X = (trans(A)*A); +// } + + static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){ + X = (A*trans(A)); + } + + static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + X = (A*B); + } + + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + X = (trans(A)*B); + } + + static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){ + Y += coef * X; + } + + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + Y = a*X + b*Y; + } + +// static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ +// C = X; +// recursive_cholesky(C); +// } + +// static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){ +// R = X; +// std::vector ipvt(N); +// lu_factor(R, ipvt); +// } + + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ + X = lower_trisolve(L, B); + } + + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + cible = source; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + cible = source; + } + +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/tensors/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/tensors/CMakeLists.txt new file mode 100644 index 000000000..09d6d8e43 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tensors/CMakeLists.txt @@ -0,0 +1,44 @@ + + +if((NOT TENSOR_INCLUDE_DIR) AND Eigen_SOURCE_DIR) + # unless TENSOR_INCLUDE_DIR is defined, let's use current Eigen version + set(TENSOR_INCLUDE_DIR ${Eigen_SOURCE_DIR}) + set(TENSOR_FOUND TRUE) +else() + find_package(Tensor) +endif() + +if (TENSOR_FOUND) + + include_directories(${TENSOR_INCLUDE_DIR}) + btl_add_bench(btl_tensor_linear main_linear.cpp) + btl_add_bench(btl_tensor_vecmat main_vecmat.cpp) + btl_add_bench(btl_tensor_matmat main_matmat.cpp) + + btl_add_target_property(btl_tensor_linear COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=tensor") + btl_add_target_property(btl_tensor_vecmat COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=tensor") + btl_add_target_property(btl_tensor_matmat COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=tensor") + + option(BTL_BENCH_NOGCCVEC "also bench Eigen explicit vec without GCC's auto vec" OFF) + if(CMAKE_COMPILER_IS_GNUCXX AND BTL_BENCH_NOGCCVEC) + btl_add_bench(btl_tensor_nogccvec_linear main_linear.cpp) + btl_add_bench(btl_tensor_nogccvec_vecmat main_vecmat.cpp) + btl_add_bench(btl_tensor_nogccvec_matmat main_matmat.cpp) + + btl_add_target_property(btl_tensor_nogccvec_linear COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=tensor_nogccvec") + btl_add_target_property(btl_tensor_nogccvec_vecmat COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=tensor_nogccvec") + btl_add_target_property(btl_tensor_nogccvec_matmat COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=tensor_nogccvec") + endif() + + + if(NOT BTL_NOVEC) + btl_add_bench(btl_tensor_novec_linear main_linear.cpp OFF) + btl_add_bench(btl_tensor_novec_vecmat main_vecmat.cpp OFF) + btl_add_bench(btl_tensor_novec_matmat main_matmat.cpp OFF) + btl_add_target_property(btl_tensor_novec_linear COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=tensor_novec") + btl_add_target_property(btl_tensor_novec_vecmat COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=tensor_novec") + btl_add_target_property(btl_tensor_novec_matmat COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=tensor_novec") + + endif(NOT BTL_NOVEC) + +endif (TENSOR_FOUND) diff --git a/thirdparty/eigen/bench/btl/libs/tensors/main_linear.cpp b/thirdparty/eigen/bench/btl/libs/tensors/main_linear.cpp new file mode 100644 index 000000000..e257f1e72 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tensors/main_linear.cpp @@ -0,0 +1,23 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "utilities.h" +#include "tensor_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + return 0; +} diff --git a/thirdparty/eigen/bench/btl/libs/tensors/main_matmat.cpp b/thirdparty/eigen/bench/btl/libs/tensors/main_matmat.cpp new file mode 100644 index 000000000..675fcfc6d --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tensors/main_matmat.cpp @@ -0,0 +1,21 @@ +//===================================================== +// Copyright (C) 2014 Benoit Steiner +//===================================================== +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +#include "utilities.h" +#include "tensor_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} diff --git a/thirdparty/eigen/bench/btl/libs/tensors/main_vecmat.cpp b/thirdparty/eigen/bench/btl/libs/tensors/main_vecmat.cpp new file mode 100644 index 000000000..1af00c81b --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tensors/main_vecmat.cpp @@ -0,0 +1,21 @@ +//===================================================== +// Copyright (C) 2014 Benoit Steiner +//===================================================== +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +#include "utilities.h" +#include "tensor_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MV,MAX_MV,NB_POINT); + + return 0; +} diff --git a/thirdparty/eigen/bench/btl/libs/tensors/tensor_interface.hh b/thirdparty/eigen/bench/btl/libs/tensors/tensor_interface.hh new file mode 100644 index 000000000..97b8e0f0b --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tensors/tensor_interface.hh @@ -0,0 +1,105 @@ +//===================================================== +// Copyright (C) 2014 Benoit Steiner +//===================================================== +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +#ifndef TENSOR_INTERFACE_HH +#define TENSOR_INTERFACE_HH + +#include +#include +#include "btl.hh" + +using namespace Eigen; + +template +class tensor_interface +{ +public : + typedef real real_type; + typedef typename Eigen::Tensor::Index Index; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef Eigen::Tensor gene_matrix; + typedef Eigen::Tensor gene_vector; + + + static inline std::string name( void ) + { + return EIGEN_MAKESTRING(BTL_PREFIX); + } + + static void free_matrix(gene_matrix & /*A*/, int /*N*/) {} + + static void free_vector(gene_vector & /*B*/) {} + + static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(Eigen::array(A_stl[0].size(), A_stl.size())); + + for (unsigned int j=0; j(i,j)) = A_stl[j][i]; + } + } + } + + static BTL_DONT_INLINE void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + B.resize(B_stl.size()); + + for (unsigned int i=0; i(i,j)); + } + } + } + + static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int /*N*/){ + typedef typename Eigen::Tensor::DimensionPair DimPair; + const Eigen::array dims(DimPair(1, 0)); + X/*.noalias()*/ = A.contract(B, dims); + } + + static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int /*N*/){ + typedef typename Eigen::Tensor::DimensionPair DimPair; + const Eigen::array dims(DimPair(1, 0)); + X/*.noalias()*/ = A.contract(B, dims); + } + + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int /*N*/){ + Y += X.constant(coef) * X; + } + + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int /*N*/){ + Y = X.constant(a)*X + Y.constant(b)*Y; + } + + static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix & source, gene_matrix & cible, int /*N*/){ + cible = source; + } + + static EIGEN_DONT_INLINE void copy_vector(const gene_vector & source, gene_vector & cible, int /*N*/){ + cible = source; + } +}; + +#endif diff --git a/thirdparty/eigen/bench/btl/libs/tvmet/CMakeLists.txt b/thirdparty/eigen/bench/btl/libs/tvmet/CMakeLists.txt new file mode 100644 index 000000000..25b565b97 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tvmet/CMakeLists.txt @@ -0,0 +1,6 @@ + +find_package(Tvmet) +if (TVMET_FOUND) + include_directories(${TVMET_INCLUDE_DIR}) + btl_add_bench(btl_tvmet main.cpp OFF) +endif (TVMET_FOUND) diff --git a/thirdparty/eigen/bench/btl/libs/tvmet/main.cpp b/thirdparty/eigen/bench/btl/libs/tvmet/main.cpp new file mode 100644 index 000000000..633215c43 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tvmet/main.cpp @@ -0,0 +1,40 @@ +//===================================================== +// File : main.cpp +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "tvmet_interface.hh" +#include "static/bench_static.hh" +#include "action_matrix_vector_product.hh" +#include "action_matrix_matrix_product.hh" +#include "action_atv_product.hh" +#include "action_axpy.hh" + +BTL_MAIN; + +int main() +{ + bench_static(); + bench_static(); + bench_static(); + bench_static(); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/tvmet/tvmet_interface.hh b/thirdparty/eigen/bench/btl/libs/tvmet/tvmet_interface.hh new file mode 100644 index 000000000..b441ada21 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/tvmet/tvmet_interface.hh @@ -0,0 +1,104 @@ +//===================================================== +// File : tvmet_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef TVMET_INTERFACE_HH +#define TVMET_INTERFACE_HH + +#include +#include +#include + +#include + +using namespace tvmet; + +template +class tvmet_interface{ + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef Vector gene_vector; + typedef Matrix gene_matrix; + + static inline std::string name() { return "tiny_tvmet"; } + + static void free_matrix(gene_matrix & A, int N){} + + static void free_vector(gene_vector & B){} + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + for (int j=0; j +// Copyright (C) EDF R&D, lun sep 30 14:23:27 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "ublas_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/thirdparty/eigen/bench/btl/libs/ublas/ublas_interface.hh b/thirdparty/eigen/bench/btl/libs/ublas/ublas_interface.hh new file mode 100644 index 000000000..95cad5195 --- /dev/null +++ b/thirdparty/eigen/bench/btl/libs/ublas/ublas_interface.hh @@ -0,0 +1,141 @@ +//===================================================== +// File : ublas_interface.hh +// Author : L. Plagne +// Copyright (C) EDF R&D, lun sep 30 14:23:27 CEST 2002 +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef UBLAS_INTERFACE_HH +#define UBLAS_INTERFACE_HH + +#include +#include +#include +#include + +using namespace boost::numeric; + +template +class ublas_interface{ + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef typename boost::numeric::ublas::matrix gene_matrix; + typedef typename boost::numeric::ublas::vector gene_vector; + + static inline std::string name( void ) { return "ublas"; } + + static void free_matrix(gene_matrix & A, int N) {} + + static void free_vector(gene_vector & B) {} + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl.size(),A_stl[0].size()); + for (int j=0; j +#include "../Eigen/Core" + +using namespace Eigen; +using namespace std; + +#define DUMP_CPUID(CODE) {\ + int abcd[4]; \ + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;\ + EIGEN_CPUID(abcd, CODE, 0); \ + std::cout << "The code " << CODE << " gives " \ + << (int*)(abcd[0]) << " " << (int*)(abcd[1]) << " " \ + << (int*)(abcd[2]) << " " << (int*)(abcd[3]) << " " << std::endl; \ + } + +int main() +{ + cout << "Eigen's L1 = " << internal::queryL1CacheSize() << endl; + cout << "Eigen's L2/L3 = " << internal::queryTopLevelCacheSize() << endl; + int l1, l2, l3; + internal::queryCacheSizes(l1, l2, l3); + cout << "Eigen's L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; + + #ifdef EIGEN_CPUID + + int abcd[4]; + int string[8]; + char* string_char = (char*)(string); + + // vendor ID + EIGEN_CPUID(abcd,0x0,0); + string[0] = abcd[1]; + string[1] = abcd[3]; + string[2] = abcd[2]; + string[3] = 0; + cout << endl; + cout << "vendor id = " << string_char << endl; + cout << endl; + int max_funcs = abcd[0]; + + internal::queryCacheSizes_intel_codes(l1, l2, l3); + cout << "Eigen's intel codes L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; + if(max_funcs>=4) + { + internal::queryCacheSizes_intel_direct(l1, l2, l3); + cout << "Eigen's intel direct L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; + } + internal::queryCacheSizes_amd(l1, l2, l3); + cout << "Eigen's amd L1, L2, L3 = " << l1 << " " << l2 << " " << l3 << endl; + cout << endl; + + // dump Intel direct method + if(max_funcs>=4) + { + l1 = l2 = l3 = 0; + int cache_id = 0; + int cache_type = 0; + do { + abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0; + EIGEN_CPUID(abcd,0x4,cache_id); + cache_type = (abcd[0] & 0x0F) >> 0; + int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5] + int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22] + int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12] + int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0] + int sets = (abcd[2]); // C[31:0] + int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1); + + cout << "cache[" << cache_id << "].type = " << cache_type << "\n"; + cout << "cache[" << cache_id << "].level = " << cache_level << "\n"; + cout << "cache[" << cache_id << "].ways = " << ways << "\n"; + cout << "cache[" << cache_id << "].partitions = " << partitions << "\n"; + cout << "cache[" << cache_id << "].line_size = " << line_size << "\n"; + cout << "cache[" << cache_id << "].sets = " << sets << "\n"; + cout << "cache[" << cache_id << "].size = " << cache_size << "\n"; + + cache_id++; + } while(cache_type>0 && cache_id<16); + } + + // dump everything + std::cout << endl <<"Raw dump:" << endl; + for(int i=0; i +#include "BenchTimer.h" +#include +#include +#include +#include +#include +using namespace Eigen; + +std::map > results; +std::vector labels; +std::vector sizes; + +template +EIGEN_DONT_INLINE +void compute_norm_equation(Solver &solver, const MatrixType &A) { + if(A.rows()!=A.cols()) + solver.compute(A.transpose()*A); + else + solver.compute(A); +} + +template +EIGEN_DONT_INLINE +void compute(Solver &solver, const MatrixType &A) { + solver.compute(A); +} + +template +void bench(int id, int rows, int size = Size) +{ + typedef Matrix Mat; + typedef Matrix MatDyn; + typedef Matrix MatSquare; + Mat A(rows,size); + A.setRandom(); + if(rows==size) + A = A*A.adjoint(); + BenchTimer t_llt, t_ldlt, t_lu, t_fplu, t_qr, t_cpqr, t_cod, t_fpqr, t_jsvd, t_bdcsvd; + + int svd_opt = ComputeThinU|ComputeThinV; + + int tries = 5; + int rep = 1000/size; + if(rep==0) rep = 1; +// rep = rep*rep; + + LLT llt(size); + LDLT ldlt(size); + PartialPivLU lu(size); + FullPivLU fplu(size,size); + HouseholderQR qr(A.rows(),A.cols()); + ColPivHouseholderQR cpqr(A.rows(),A.cols()); + CompleteOrthogonalDecomposition cod(A.rows(),A.cols()); + FullPivHouseholderQR fpqr(A.rows(),A.cols()); + JacobiSVD jsvd(A.rows(),A.cols()); + BDCSVD bdcsvd(A.rows(),A.cols()); + + BENCH(t_llt, tries, rep, compute_norm_equation(llt,A)); + BENCH(t_ldlt, tries, rep, compute_norm_equation(ldlt,A)); + BENCH(t_lu, tries, rep, compute_norm_equation(lu,A)); + if(size<=1000) + BENCH(t_fplu, tries, rep, compute_norm_equation(fplu,A)); + BENCH(t_qr, tries, rep, compute(qr,A)); + BENCH(t_cpqr, tries, rep, compute(cpqr,A)); + BENCH(t_cod, tries, rep, compute(cod,A)); + if(size*rows<=10000000) + BENCH(t_fpqr, tries, rep, compute(fpqr,A)); + if(size<500) // JacobiSVD is really too slow for too large matrices + BENCH(t_jsvd, tries, rep, jsvd.compute(A,svd_opt)); +// if(size*rows<=20000000) + BENCH(t_bdcsvd, tries, rep, bdcsvd.compute(A,svd_opt)); + + results["LLT"][id] = t_llt.best(); + results["LDLT"][id] = t_ldlt.best(); + results["PartialPivLU"][id] = t_lu.best(); + results["FullPivLU"][id] = t_fplu.best(); + results["HouseholderQR"][id] = t_qr.best(); + results["ColPivHouseholderQR"][id] = t_cpqr.best(); + results["CompleteOrthogonalDecomposition"][id] = t_cod.best(); + results["FullPivHouseholderQR"][id] = t_fpqr.best(); + results["JacobiSVD"][id] = t_jsvd.best(); + results["BDCSVD"][id] = t_bdcsvd.best(); +} + + +int main() +{ + labels.push_back("LLT"); + labels.push_back("LDLT"); + labels.push_back("PartialPivLU"); + labels.push_back("FullPivLU"); + labels.push_back("HouseholderQR"); + labels.push_back("ColPivHouseholderQR"); + labels.push_back("CompleteOrthogonalDecomposition"); + labels.push_back("FullPivHouseholderQR"); + labels.push_back("JacobiSVD"); + labels.push_back("BDCSVD"); + + for(int i=0; i(k,sizes[k](0),sizes[k](1)); + } + + cout.width(32); + cout << "solver/size"; + cout << " "; + for(int k=0; k=1e6) cout << "-"; + else cout << r(k); + cout << " "; + } + cout << endl; + } + + // HTML output + cout << "" << endl; + cout << "" << endl; + for(int k=0; k" << sizes[k](0) << "x" << sizes[k](1) << ""; + cout << "" << endl; + for(int i=0; i"; + ArrayXf r = (results[labels[i]]*100000.f).floor()/100.f; + for(int k=0; k=1e6) cout << ""; + else + { + cout << ""; + } + } + cout << "" << endl; + } + cout << "
solver/size
" << labels[i] << "-" << r(k); + if(i>0) + cout << " (x" << numext::round(10.f*results[labels[i]](k)/results["LLT"](k))/10.f << ")"; + if(i<4 && sizes[k](0)!=sizes[k](1)) + cout << " *"; + cout << "
" << endl; + +// cout << "LLT (ms) " << (results["LLT"]*1000.).format(fmt) << "\n"; +// cout << "LDLT (%) " << (results["LDLT"]/results["LLT"]).format(fmt) << "\n"; +// cout << "PartialPivLU (%) " << (results["PartialPivLU"]/results["LLT"]).format(fmt) << "\n"; +// cout << "FullPivLU (%) " << (results["FullPivLU"]/results["LLT"]).format(fmt) << "\n"; +// cout << "HouseholderQR (%) " << (results["HouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "ColPivHouseholderQR (%) " << (results["ColPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "CompleteOrthogonalDecomposition (%) " << (results["CompleteOrthogonalDecomposition"]/results["LLT"]).format(fmt) << "\n"; +// cout << "FullPivHouseholderQR (%) " << (results["FullPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "JacobiSVD (%) " << (results["JacobiSVD"]/results["LLT"]).format(fmt) << "\n"; +// cout << "BDCSVD (%) " << (results["BDCSVD"]/results["LLT"]).format(fmt) << "\n"; +} diff --git a/thirdparty/eigen/bench/eig33.cpp b/thirdparty/eigen/bench/eig33.cpp new file mode 100644 index 000000000..47947a9be --- /dev/null +++ b/thirdparty/eigen/bench/eig33.cpp @@ -0,0 +1,195 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// The computeRoots function included in this is based on materials +// covered by the following copyright and license: +// +// Geometric Tools, LLC +// Copyright (c) 1998-2010 +// Distributed under the Boost Software License, Version 1.0. +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +#include +#include +#include +#include +#include + +using namespace Eigen; +using namespace std; + +template +inline void computeRoots(const Matrix& m, Roots& roots) +{ + typedef typename Matrix::Scalar Scalar; + const Scalar s_inv3 = 1.0/3.0; + const Scalar s_sqrt3 = std::sqrt(Scalar(3.0)); + + // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0. The + // eigenvalues are the roots to this equation, all guaranteed to be + // real-valued, because the matrix is symmetric. + Scalar c0 = m(0,0)*m(1,1)*m(2,2) + Scalar(2)*m(0,1)*m(0,2)*m(1,2) - m(0,0)*m(1,2)*m(1,2) - m(1,1)*m(0,2)*m(0,2) - m(2,2)*m(0,1)*m(0,1); + Scalar c1 = m(0,0)*m(1,1) - m(0,1)*m(0,1) + m(0,0)*m(2,2) - m(0,2)*m(0,2) + m(1,1)*m(2,2) - m(1,2)*m(1,2); + Scalar c2 = m(0,0) + m(1,1) + m(2,2); + + // Construct the parameters used in classifying the roots of the equation + // and in solving the equation for the roots in closed form. + Scalar c2_over_3 = c2*s_inv3; + Scalar a_over_3 = (c1 - c2*c2_over_3)*s_inv3; + if (a_over_3 > Scalar(0)) + a_over_3 = Scalar(0); + + Scalar half_b = Scalar(0.5)*(c0 + c2_over_3*(Scalar(2)*c2_over_3*c2_over_3 - c1)); + + Scalar q = half_b*half_b + a_over_3*a_over_3*a_over_3; + if (q > Scalar(0)) + q = Scalar(0); + + // Compute the eigenvalues by solving for the roots of the polynomial. + Scalar rho = std::sqrt(-a_over_3); + Scalar theta = std::atan2(std::sqrt(-q),half_b)*s_inv3; + Scalar cos_theta = std::cos(theta); + Scalar sin_theta = std::sin(theta); + roots(2) = c2_over_3 + Scalar(2)*rho*cos_theta; + roots(0) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta); + roots(1) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta); +} + +template +void eigen33(const Matrix& mat, Matrix& evecs, Vector& evals) +{ + typedef typename Matrix::Scalar Scalar; + // Scale the matrix so its entries are in [-1,1]. The scaling is applied + // only when at least one matrix entry has magnitude larger than 1. + + Scalar shift = mat.trace()/3; + Matrix scaledMat = mat; + scaledMat.diagonal().array() -= shift; + Scalar scale = scaledMat.cwiseAbs()/*.template triangularView()*/.maxCoeff(); + scale = std::max(scale,Scalar(1)); + scaledMat/=scale; + + // Compute the eigenvalues +// scaledMat.setZero(); + computeRoots(scaledMat,evals); + + // compute the eigen vectors + // **here we assume 3 differents eigenvalues** + + // "optimized version" which appears to be slower with gcc! +// Vector base; +// Scalar alpha, beta; +// base << scaledMat(1,0) * scaledMat(2,1), +// scaledMat(1,0) * scaledMat(2,0), +// -scaledMat(1,0) * scaledMat(1,0); +// for(int k=0; k<2; ++k) +// { +// alpha = scaledMat(0,0) - evals(k); +// beta = scaledMat(1,1) - evals(k); +// evecs.col(k) = (base + Vector(-beta*scaledMat(2,0), -alpha*scaledMat(2,1), alpha*beta)).normalized(); +// } +// evecs.col(2) = evecs.col(0).cross(evecs.col(1)).normalized(); + +// // naive version +// Matrix tmp; +// tmp = scaledMat; +// tmp.diagonal().array() -= evals(0); +// evecs.col(0) = tmp.row(0).cross(tmp.row(1)).normalized(); +// +// tmp = scaledMat; +// tmp.diagonal().array() -= evals(1); +// evecs.col(1) = tmp.row(0).cross(tmp.row(1)).normalized(); +// +// tmp = scaledMat; +// tmp.diagonal().array() -= evals(2); +// evecs.col(2) = tmp.row(0).cross(tmp.row(1)).normalized(); + + // a more stable version: + if((evals(2)-evals(0))<=Eigen::NumTraits::epsilon()) + { + evecs.setIdentity(); + } + else + { + Matrix tmp; + tmp = scaledMat; + tmp.diagonal ().array () -= evals (2); + evecs.col (2) = tmp.row (0).cross (tmp.row (1)).normalized (); + + tmp = scaledMat; + tmp.diagonal ().array () -= evals (1); + evecs.col(1) = tmp.row (0).cross(tmp.row (1)); + Scalar n1 = evecs.col(1).norm(); + if(n1<=Eigen::NumTraits::epsilon()) + evecs.col(1) = evecs.col(2).unitOrthogonal(); + else + evecs.col(1) /= n1; + + // make sure that evecs[1] is orthogonal to evecs[2] + evecs.col(1) = evecs.col(2).cross(evecs.col(1).cross(evecs.col(2))).normalized(); + evecs.col(0) = evecs.col(2).cross(evecs.col(1)); + } + + // Rescale back to the original size. + evals *= scale; + evals.array()+=shift; +} + +int main() +{ + BenchTimer t; + int tries = 10; + int rep = 400000; + typedef Matrix3d Mat; + typedef Vector3d Vec; + Mat A = Mat::Random(3,3); + A = A.adjoint() * A; +// Mat Q = A.householderQr().householderQ(); +// A = Q * Vec(2.2424567,2.2424566,7.454353).asDiagonal() * Q.transpose(); + + SelfAdjointEigenSolver eig(A); + BENCH(t, tries, rep, eig.compute(A)); + std::cout << "Eigen iterative: " << t.best() << "s\n"; + + BENCH(t, tries, rep, eig.computeDirect(A)); + std::cout << "Eigen direct : " << t.best() << "s\n"; + + Mat evecs; + Vec evals; + BENCH(t, tries, rep, eigen33(A,evecs,evals)); + std::cout << "Direct: " << t.best() << "s\n\n"; + +// std::cerr << "Eigenvalue/eigenvector diffs:\n"; +// std::cerr << (evals - eig.eigenvalues()).transpose() << "\n"; +// for(int k=0;k<3;++k) +// if(evecs.col(k).dot(eig.eigenvectors().col(k))<0) +// evecs.col(k) = -evecs.col(k); +// std::cerr << evecs - eig.eigenvectors() << "\n\n"; +} diff --git a/thirdparty/eigen/bench/geometry.cpp b/thirdparty/eigen/bench/geometry.cpp new file mode 100644 index 000000000..b187a515f --- /dev/null +++ b/thirdparty/eigen/bench/geometry.cpp @@ -0,0 +1,126 @@ + +#include +#include +#include + +using namespace std; +using namespace Eigen; + +#ifndef SCALAR +#define SCALAR float +#endif + +#ifndef SIZE +#define SIZE 8 +#endif + +typedef SCALAR Scalar; +typedef NumTraits::Real RealScalar; +typedef Matrix A; +typedef Matrix B; +typedef Matrix C; +typedef Matrix M; + +template +EIGEN_DONT_INLINE void transform(const Transformation& t, Data& data) +{ + EIGEN_ASM_COMMENT("begin"); + data = t * data; + EIGEN_ASM_COMMENT("end"); +} + +template +EIGEN_DONT_INLINE void transform(const Quaternion& t, Data& data) +{ + EIGEN_ASM_COMMENT("begin quat"); + for(int i=0;i struct ToRotationMatrixWrapper +{ + enum {Dim = T::Dim}; + typedef typename T::Scalar Scalar; + ToRotationMatrixWrapper(const T& o) : object(o) {} + T object; +}; + +template +EIGEN_DONT_INLINE void transform(const ToRotationMatrixWrapper& t, Data& data) +{ + EIGEN_ASM_COMMENT("begin quat via mat"); + data = t.object.toRotationMatrix() * data; + EIGEN_ASM_COMMENT("end quat via mat"); +} + +template +EIGEN_DONT_INLINE void transform(const Transform& t, Data& data) +{ + data = (t * data.colwise().homogeneous()).template block(0,0); +} + +template struct get_dim { enum { Dim = T::Dim }; }; +template +struct get_dim > { enum { Dim = R }; }; + +template +struct bench_impl +{ + static EIGEN_DONT_INLINE void run(const Transformation& t) + { + Matrix::Dim,N> data; + data.setRandom(); + bench_impl::run(t); + BenchTimer timer; + BENCH(timer,10,100000,transform(t,data)); + cout.width(9); + cout << timer.best() << " "; + } +}; + + +template +struct bench_impl +{ + static EIGEN_DONT_INLINE void run(const Transformation&) {} +}; + +template +EIGEN_DONT_INLINE void bench(const std::string& msg, const Transformation& t) +{ + cout << msg << " "; + bench_impl::run(t); + std::cout << "\n"; +} + +int main(int argc, char ** argv) +{ + Matrix mat34; mat34.setRandom(); + Transform iso3(mat34); + Transform aff3(mat34); + Transform caff3(mat34); + Transform proj3(mat34); + Quaternion quat;quat.setIdentity(); + ToRotationMatrixWrapper > quatmat(quat); + Matrix mat33; mat33.setRandom(); + + cout.precision(4); + std::cout + << "N "; + for(int i=0;i +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +typedef Matrix Mat; + +EIGEN_DONT_INLINE +void gemm(const Mat &A, const Mat &B, Mat &C) +{ + C.noalias() += A * B; +} + +EIGEN_DONT_INLINE +double bench(long m, long n, long k) +{ + Mat A(m,k); + Mat B(k,n); + Mat C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e8*4/sizeof(Scalar); + double tm0 = 4, tm1 = 10; + if(NumTraits::IsComplex) + { + up /= 4; + tm0 = 2; + tm1 = 4; + } + + double flops = 2. * m * n * k; + long rep = std::max(1., std::min(100., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("gemm_settings.txt"); + long m, n, k; + while(settings >> m >> n >> k) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench(m, n, k) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/thirdparty/eigen/bench/perf_monitoring/gemm/gemm_settings.txt b/thirdparty/eigen/bench/perf_monitoring/gemm/gemm_settings.txt new file mode 100644 index 000000000..5c43e1c7d --- /dev/null +++ b/thirdparty/eigen/bench/perf_monitoring/gemm/gemm_settings.txt @@ -0,0 +1,15 @@ +8 8 8 +9 9 9 +24 24 24 +239 239 239 +240 240 240 +2400 24 24 +24 2400 24 +24 24 2400 +24 2400 2400 +2400 24 2400 +2400 2400 24 +2400 2400 64 +4800 23 160 +23 4800 160 +2400 2400 2400 diff --git a/thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp b/thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp new file mode 100644 index 000000000..6dc370155 --- /dev/null +++ b/thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +template +EIGEN_DONT_INLINE +void lazy_gemm(const MatA &A, const MatB &B, MatC &C) +{ +// escape((void*)A.data()); +// escape((void*)B.data()); + C.noalias() += A.lazyProduct(B); +// escape((void*)C.data()); +} + +template +EIGEN_DONT_INLINE +double bench() +{ + typedef Matrix MatA; + typedef Matrix MatB; + typedef Matrix MatC; + + MatA A(m,k); + MatB B(k,n); + MatC C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e7*4/sizeof(Scalar); + double tm0 = 10, tm1 = 20; + + double flops = 2. * m * n * k; + long rep = std::max(10., std::min(10000., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, lazy_gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +double bench_t(int t) +{ + if(t) + return bench(); + else + return bench(); +} + +EIGEN_DONT_INLINE +double bench_mnk(int m, int n, int k, int t) +{ + int id = m*10000 + n*100 + k; + switch(id) { + case 10101 : return bench_t< 1, 1, 1>(t); break; + case 20202 : return bench_t< 2, 2, 2>(t); break; + case 30303 : return bench_t< 3, 3, 3>(t); break; + case 40404 : return bench_t< 4, 4, 4>(t); break; + case 50505 : return bench_t< 5, 5, 5>(t); break; + case 60606 : return bench_t< 6, 6, 6>(t); break; + case 70707 : return bench_t< 7, 7, 7>(t); break; + case 80808 : return bench_t< 8, 8, 8>(t); break; + case 90909 : return bench_t< 9, 9, 9>(t); break; + case 101010 : return bench_t<10,10,10>(t); break; + case 111111 : return bench_t<11,11,11>(t); break; + case 121212 : return bench_t<12,12,12>(t); break; + } + return 0; +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("lazy_gemm_settings.txt"); + long m, n, k, t; + while(settings >> m >> n >> k >> t) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench_mnk(m, n, k, t) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt b/thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt new file mode 100644 index 000000000..407d5d4fa --- /dev/null +++ b/thirdparty/eigen/bench/perf_monitoring/gemm/lazy_gemm_settings.txt @@ -0,0 +1,15 @@ +1 1 1 0 +2 2 2 0 +3 3 3 0 +4 4 4 0 +4 4 4 1 +5 5 5 0 +6 6 6 0 +7 7 7 0 +7 7 7 1 +8 8 8 0 +9 9 9 0 +10 10 10 0 +11 11 11 0 +12 12 12 0 +12 12 12 1 diff --git a/thirdparty/eigen/bench/perf_monitoring/gemm/make_plot.sh b/thirdparty/eigen/bench/perf_monitoring/gemm/make_plot.sh new file mode 100755 index 000000000..cd3214ac9 --- /dev/null +++ b/thirdparty/eigen/bench/perf_monitoring/gemm/make_plot.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# base name of the bench +# it reads $1.out +# and generates $1.pdf +WHAT=$1 +bench=$2 + +header="rev " +while read line +do + if [ ! -z '$line' ]; then + header="$header \"$line\"" + fi +done < $bench"_settings.txt" + +echo $header > $WHAT.out.header +cat $WHAT.out >> $WHAT.out.header + + +echo "set title '$WHAT'" > $WHAT.gnuplot +echo "set key autotitle columnhead outside " >> $WHAT.gnuplot +echo "set xtics rotate 1" >> $WHAT.gnuplot + +echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot +echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot + +col=`cat $bench"_settings.txt" | wc -l` +echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot +echo " " >> $WHAT.gnuplot + +gnuplot -persist < $WHAT.gnuplot + +# generate a png file +# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png + +# clean +rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file diff --git a/thirdparty/eigen/bench/perf_monitoring/gemm/run.sh b/thirdparty/eigen/bench/perf_monitoring/gemm/run.sh new file mode 100755 index 000000000..9d6ee40bc --- /dev/null +++ b/thirdparty/eigen/bench/perf_monitoring/gemm/run.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# ./run.sh gemm +# ./run.sh lazy_gemm + +# Examples of environment variables to be set: +# PREFIX="haswell-fma-" +# CXX_FLAGS="-mfma" + +# Options: +# -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests + +bench=$1 + +if echo "$*" | grep '\-up' > /dev/null; then + update=true +else + update=false +fi + +if echo "$*" | grep '\-s' > /dev/null; then + selected=true +else + selected=false +fi + +global_args="$*" + +if [ $selected == true ]; then + echo "Recompute selected changesets only and keep bests" +elif [ $update == true ]; then + echo "(Re-)Compute all changesets and keep bests" +else + echo "Skip previously computed changesets" +fi + + + +if [ ! -d "eigen_src" ]; then + hg clone https://bitbucket.org/eigen/eigen eigen_src +else + cd eigen_src + hg pull -u + cd .. +fi + +if [ ! -z '$CXX' ]; then + CXX=g++ +fi + +function make_backup +{ + if [ -f "$1.out" ]; then + mv "$1.out" "$1.backup" + fi +} + +function merge +{ + count1=`echo $1 | wc -w` + count2=`echo $2 | wc -w` + + if [ $count1 == $count2 ]; then + a=( $1 ); b=( $2 ) + res="" + for (( i=0 ; i<$count1 ; i++ )); do + ai=${a[$i]}; bi=${b[$i]} + tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l` + res="$res $tmp" + done + echo $res + + else + echo $1 + fi +} + +function test_current +{ + rev=$1 + scalar=$2 + name=$3 + + prev="" + if [ -e "$name.backup" ]; then + prev=`grep $rev "$name.backup" | cut -c 14-` + fi + res=$prev + count_rev=`echo $prev | wc -w` + count_ref=`cat $bench"_settings.txt" | wc -l` + if echo "$global_args" | grep "$rev" > /dev/null; then + rev_found=true + else + rev_found=false + fi +# echo $update et $selected et $rev_found because $rev et "$global_args" +# echo $count_rev et $count_ref + if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then + if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then + curr=`./$name` + if [ $count_rev == $count_ref ]; then + echo "merge previous $prev" + echo "with new $curr" + else + echo "got $curr" + fi + res=`merge "$curr" "$prev"` +# echo $res + echo "$rev $res" >> $name.out + else + echo "Compilation failed, skip rev $rev" + fi + else + echo "Skip existing results for $rev / $name" + echo "$rev $res" >> $name.out + fi +} + +make_backup $PREFIX"s"$bench +make_backup $PREFIX"d"$bench +make_backup $PREFIX"c"$bench + +cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev +do + if [ ! -z '$rev' ]; then + echo "Testing rev $rev" + cd eigen_src + hg up -C $rev > /dev/null + actual_rev=`hg identify | cut -f1 -d' '` + cd .. + + test_current $actual_rev float $PREFIX"s"$bench + test_current $actual_rev double $PREFIX"d"$bench + test_current $actual_rev "std::complex" $PREFIX"c"$bench + fi + +done + +echo "Float:" +cat $PREFIX"s""$bench.out" +echo " " + +echo "Double:" +cat $PREFIX"d""$bench.out" +echo "" + +echo "Complex:" +cat $PREFIX"c""$bench.out" +echo "" + +./make_plot.sh $PREFIX"s"$bench $bench +./make_plot.sh $PREFIX"d"$bench $bench +./make_plot.sh $PREFIX"c"$bench $bench + + diff --git a/thirdparty/eigen/bench/product_threshold.cpp b/thirdparty/eigen/bench/product_threshold.cpp new file mode 100644 index 000000000..dd6d15a07 --- /dev/null +++ b/thirdparty/eigen/bench/product_threshold.cpp @@ -0,0 +1,143 @@ + +#include +#include +#include + +using namespace Eigen; +using namespace std; + +#define END 9 + +template struct map_size { enum { ret = S }; }; +template<> struct map_size<10> { enum { ret = 20 }; }; +template<> struct map_size<11> { enum { ret = 50 }; }; +template<> struct map_size<12> { enum { ret = 100 }; }; +template<> struct map_size<13> { enum { ret = 300 }; }; + +template struct alt_prod +{ + enum { + ret = M==1 && N==1 ? InnerProduct + : K==1 ? OuterProduct + : M==1 ? GemvProduct + : N==1 ? GemvProduct + : GemmProduct + }; +}; + +void print_mode(int mode) +{ + if(mode==InnerProduct) std::cout << "i"; + if(mode==OuterProduct) std::cout << "o"; + if(mode==CoeffBasedProductMode) std::cout << "c"; + if(mode==LazyCoeffBasedProductMode) std::cout << "l"; + if(mode==GemvProduct) std::cout << "v"; + if(mode==GemmProduct) std::cout << "m"; +} + +template +EIGEN_DONT_INLINE void prod(const Lhs& a, const Rhs& b, Res& c) +{ + c.noalias() += typename ProductReturnType::Type(a,b); +} + +template +EIGEN_DONT_INLINE void bench_prod() +{ + typedef Matrix Lhs; Lhs a; a.setRandom(); + typedef Matrix Rhs; Rhs b; b.setRandom(); + typedef Matrix Res; Res c; c.setRandom(); + + BenchTimer t; + double n = 2.*double(M)*double(N)*double(K); + int rep = 100000./n; + rep /= 2; + if(rep<1) rep = 1; + do { + rep *= 2; + t.reset(); + BENCH(t,1,rep,prod(a,b,c)); + } while(t.best()<0.1); + + t.reset(); + BENCH(t,5,rep,prod(a,b,c)); + + print_mode(Mode); + std::cout << int(1e-6*n*rep/t.best()) << "\t"; +} + +template struct print_n; +template struct loop_on_m; +template struct loop_on_n; + +template +struct loop_on_k +{ + static void run() + { + std::cout << "K=" << K << "\t"; + print_n::run(); + std::cout << "\n"; + + loop_on_m::run(); + std::cout << "\n\n"; + + loop_on_k::run(); + } +}; + +template +struct loop_on_k { static void run(){} }; + + +template +struct loop_on_m +{ + static void run() + { + std::cout << M << "f\t"; + loop_on_n::run(); + std::cout << "\n"; + + std::cout << M << "f\t"; + loop_on_n::run(); + std::cout << "\n"; + + loop_on_m::run(); + } +}; + +template +struct loop_on_m { static void run(){} }; + +template +struct loop_on_n +{ + static void run() + { + bench_prod::ret : Mode>(); + + loop_on_n::run(); + } +}; + +template +struct loop_on_n { static void run(){} }; + +template struct print_n +{ + static void run() + { + std::cout << map_size::ret << "\t"; + print_n::run(); + } +}; + +template<> struct print_n { static void run(){} }; + +int main() +{ + loop_on_k<1,1,1>::run(); + + return 0; +} diff --git a/thirdparty/eigen/bench/quat_slerp.cpp b/thirdparty/eigen/bench/quat_slerp.cpp new file mode 100644 index 000000000..bffb3bf11 --- /dev/null +++ b/thirdparty/eigen/bench/quat_slerp.cpp @@ -0,0 +1,247 @@ + +#include +#include +#include +using namespace Eigen; +using namespace std; + + + +template +EIGEN_DONT_INLINE Q nlerp(const Q& a, const Q& b, typename Q::Scalar t) +{ + return Q((a.coeffs() * (1.0-t) + b.coeffs() * t).normalized()); +} + +template +EIGEN_DONT_INLINE Q slerp_eigen(const Q& a, const Q& b, typename Q::Scalar t) +{ + return a.slerp(t,b); +} + +template +EIGEN_DONT_INLINE Q slerp_legacy(const Q& a, const Q& b, typename Q::Scalar t) +{ + typedef typename Q::Scalar Scalar; + static const Scalar one = Scalar(1) - dummy_precision(); + Scalar d = a.dot(b); + Scalar absD = internal::abs(d); + if (absD>=one) + return a; + + // theta is the angle between the 2 quaternions + Scalar theta = std::acos(absD); + Scalar sinTheta = internal::sin(theta); + + Scalar scale0 = internal::sin( ( Scalar(1) - t ) * theta) / sinTheta; + Scalar scale1 = internal::sin( ( t * theta) ) / sinTheta; + if (d<0) + scale1 = -scale1; + + return Q(scale0 * a.coeffs() + scale1 * b.coeffs()); +} + +template +EIGEN_DONT_INLINE Q slerp_legacy_nlerp(const Q& a, const Q& b, typename Q::Scalar t) +{ + typedef typename Q::Scalar Scalar; + static const Scalar one = Scalar(1) - epsilon(); + Scalar d = a.dot(b); + Scalar absD = internal::abs(d); + + Scalar scale0; + Scalar scale1; + + if (absD>=one) + { + scale0 = Scalar(1) - t; + scale1 = t; + } + else + { + // theta is the angle between the 2 quaternions + Scalar theta = std::acos(absD); + Scalar sinTheta = internal::sin(theta); + + scale0 = internal::sin( ( Scalar(1) - t ) * theta) / sinTheta; + scale1 = internal::sin( ( t * theta) ) / sinTheta; + if (d<0) + scale1 = -scale1; + } + + return Q(scale0 * a.coeffs() + scale1 * b.coeffs()); +} + +template +inline T sin_over_x(T x) +{ + if (T(1) + x*x == T(1)) + return T(1); + else + return std::sin(x)/x; +} + +template +EIGEN_DONT_INLINE Q slerp_rw(const Q& a, const Q& b, typename Q::Scalar t) +{ + typedef typename Q::Scalar Scalar; + + Scalar d = a.dot(b); + Scalar theta; + if (d<0.0) + theta = /*M_PI -*/ Scalar(2)*std::asin( (a.coeffs()+b.coeffs()).norm()/2 ); + else + theta = Scalar(2)*std::asin( (a.coeffs()-b.coeffs()).norm()/2 ); + + // theta is the angle between the 2 quaternions +// Scalar theta = std::acos(absD); + Scalar sinOverTheta = sin_over_x(theta); + + Scalar scale0 = (Scalar(1)-t)*sin_over_x( ( Scalar(1) - t ) * theta) / sinOverTheta; + Scalar scale1 = t * sin_over_x( ( t * theta) ) / sinOverTheta; + if (d<0) + scale1 = -scale1; + + return Quaternion(scale0 * a.coeffs() + scale1 * b.coeffs()); +} + +template +EIGEN_DONT_INLINE Q slerp_gael(const Q& a, const Q& b, typename Q::Scalar t) +{ + typedef typename Q::Scalar Scalar; + + Scalar d = a.dot(b); + Scalar theta; +// theta = Scalar(2) * atan2((a.coeffs()-b.coeffs()).norm(),(a.coeffs()+b.coeffs()).norm()); +// if (d<0.0) +// theta = M_PI-theta; + + if (d<0.0) + theta = /*M_PI -*/ Scalar(2)*std::asin( (-a.coeffs()-b.coeffs()).norm()/2 ); + else + theta = Scalar(2)*std::asin( (a.coeffs()-b.coeffs()).norm()/2 ); + + + Scalar scale0; + Scalar scale1; + if(theta*theta-Scalar(6)==-Scalar(6)) + { + scale0 = Scalar(1) - t; + scale1 = t; + } + else + { + Scalar sinTheta = std::sin(theta); + scale0 = internal::sin( ( Scalar(1) - t ) * theta) / sinTheta; + scale1 = internal::sin( ( t * theta) ) / sinTheta; + if (d<0) + scale1 = -scale1; + } + + return Quaternion(scale0 * a.coeffs() + scale1 * b.coeffs()); +} + +int main() +{ + typedef double RefScalar; + typedef float TestScalar; + + typedef Quaternion Qd; + typedef Quaternion Qf; + + unsigned int g_seed = (unsigned int) time(NULL); + std::cout << g_seed << "\n"; +// g_seed = 1259932496; + srand(g_seed); + + Matrix maxerr(7); + maxerr.setZero(); + + Matrix avgerr(7); + avgerr.setZero(); + + cout << "double=>float=>double nlerp eigen legacy(snap) legacy(nlerp) rightway gael's criteria\n"; + + int rep = 100; + int iters = 40; + for (int w=0; w()); + Qd br(b.cast()); + Qd cr; + + + + cout.precision(8); + cout << std::scientific; + for (int i=0; i(); + c[0] = nlerp(a,b,t); + c[1] = slerp_eigen(a,b,t); + c[2] = slerp_legacy(a,b,t); + c[3] = slerp_legacy_nlerp(a,b,t); + c[4] = slerp_rw(a,b,t); + c[5] = slerp_gael(a,b,t); + + VectorXd err(7); + err[0] = (cr.coeffs()-refc.cast().coeffs()).norm(); +// std::cout << err[0] << " "; + for (int k=0; k<6; ++k) + { + err[k+1] = (c[k].coeffs()-refc.coeffs()).norm(); +// std::cout << err[k+1] << " "; + } + maxerr = maxerr.cwise().max(err); + avgerr += err; +// std::cout << "\n"; + b = cr.cast(); + br = cr; + } +// std::cout << "\n"; + } + avgerr /= RefScalar(rep*iters); + cout << "\n\nAccuracy:\n" + << " max: " << maxerr.transpose() << "\n"; + cout << " avg: " << avgerr.transpose() << "\n"; + + // perf bench + Quaternionf a,b; + a.coeffs().setRandom(); + a.normalize(); + b.coeffs().setRandom(); + b.normalize(); + //b = a; + float s = 0.65; + + #define BENCH(FUNC) {\ + BenchTimer t; \ + for(int k=0; k<2; ++k) {\ + t.start(); \ + for(int i=0; i<1000000; ++i) \ + FUNC(a,b,s); \ + t.stop(); \ + } \ + cout << " " << #FUNC << " => \t " << t.value() << "s\n"; \ + } + + cout << "\nSpeed:\n" << std::fixed; + BENCH(nlerp); + BENCH(slerp_eigen); + BENCH(slerp_legacy); + BENCH(slerp_legacy_nlerp); + BENCH(slerp_rw); + BENCH(slerp_gael); +} + diff --git a/thirdparty/eigen/bench/quatmul.cpp b/thirdparty/eigen/bench/quatmul.cpp new file mode 100644 index 000000000..8d9d7922c --- /dev/null +++ b/thirdparty/eigen/bench/quatmul.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +using namespace Eigen; + +template +EIGEN_DONT_INLINE void quatmul_default(const Quat& a, const Quat& b, Quat& c) +{ + c = a * b; +} + +template +EIGEN_DONT_INLINE void quatmul_novec(const Quat& a, const Quat& b, Quat& c) +{ + c = internal::quat_product<0, Quat, Quat, typename Quat::Scalar, Aligned>::run(a,b); +} + +template void bench(const std::string& label) +{ + int tries = 10; + int rep = 1000000; + BenchTimer t; + + Quat a(4, 1, 2, 3); + Quat b(2, 3, 4, 5); + Quat c; + + std::cout.precision(3); + + BENCH(t, tries, rep, quatmul_default(a,b,c)); + std::cout << label << " default " << 1e3*t.best(CPU_TIMER) << "ms \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n"; + + BENCH(t, tries, rep, quatmul_novec(a,b,c)); + std::cout << label << " novec " << 1e3*t.best(CPU_TIMER) << "ms \t" << 1e-6*double(rep)/(t.best(CPU_TIMER)) << " M mul/s\n"; +} + +int main() +{ + bench("float "); + bench("double"); + + return 0; + +} + diff --git a/thirdparty/eigen/bench/sparse_cholesky.cpp b/thirdparty/eigen/bench/sparse_cholesky.cpp new file mode 100644 index 000000000..ecb226786 --- /dev/null +++ b/thirdparty/eigen/bench/sparse_cholesky.cpp @@ -0,0 +1,216 @@ +// #define EIGEN_TAUCS_SUPPORT +// #define EIGEN_CHOLMOD_SUPPORT +#include +#include + +// g++ -DSIZE=10000 -DDENSITY=0.001 sparse_cholesky.cpp -I.. -DDENSEMATRI -O3 -g0 -DNDEBUG -DNBTRIES=1 -I /home/gael/Coding/LinearAlgebra/taucs_full/src/ -I/home/gael/Coding/LinearAlgebra/taucs_full/build/linux/ -L/home/gael/Coding/LinearAlgebra/taucs_full/lib/linux/ -ltaucs /home/gael/Coding/LinearAlgebra/GotoBLAS/libgoto.a -lpthread -I /home/gael/Coding/LinearAlgebra/SuiteSparse/CHOLMOD/Include/ $CHOLLIB -I /home/gael/Coding/LinearAlgebra/SuiteSparse/UFconfig/ /home/gael/Coding/LinearAlgebra/SuiteSparse/CCOLAMD/Lib/libccolamd.a /home/gael/Coding/LinearAlgebra/SuiteSparse/CHOLMOD/Lib/libcholmod.a -lmetis /home/gael/Coding/LinearAlgebra/SuiteSparse/AMD/Lib/libamd.a /home/gael/Coding/LinearAlgebra/SuiteSparse/CAMD/Lib/libcamd.a /home/gael/Coding/LinearAlgebra/SuiteSparse/CCOLAMD/Lib/libccolamd.a /home/gael/Coding/LinearAlgebra/SuiteSparse/COLAMD/Lib/libcolamd.a -llapack && ./a.out + +#define NOGMM +#define NOMTL + +#ifndef SIZE +#define SIZE 10 +#endif + +#ifndef DENSITY +#define DENSITY 0.01 +#endif + +#ifndef REPEAT +#define REPEAT 1 +#endif + +#include "BenchSparseUtil.h" + +#ifndef MINDENSITY +#define MINDENSITY 0.0004 +#endif + +#ifndef NBTRIES +#define NBTRIES 10 +#endif + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j EigenSparseTriMatrix; +typedef SparseMatrix EigenSparseSelfAdjointMatrix; + +void fillSpdMatrix(float density, int rows, int cols, EigenSparseSelfAdjointMatrix& dst) +{ + dst.startFill(rows*cols*density); + for(int j = 0; j < cols; j++) + { + dst.fill(j,j) = internal::random(10,20); + for(int i = j+1; i < rows; i++) + { + Scalar v = (internal::random(0,1) < density) ? internal::random() : 0; + if (v!=0) + dst.fill(i,j) = v; + } + + } + dst.endFill(); +} + +#include + +template +void doEigen(const char* name, const EigenSparseSelfAdjointMatrix& sm1, int flags = 0) +{ + std::cout << name << "..." << std::flush; + BenchTimer timer; + timer.start(); + SparseLLT chol(sm1, flags); + timer.stop(); + std::cout << ":\t" << timer.value() << endl; + + std::cout << " nnz: " << sm1.nonZeros() << " => " << chol.matrixL().nonZeros() << "\n"; +// std::cout << "sparse\n" << chol.matrixL() << "%\n"; +} + +int main(int argc, char *argv[]) +{ + int rows = SIZE; + int cols = SIZE; + float density = DENSITY; + BenchTimer timer; + + VectorXf b = VectorXf::Random(cols); + VectorXf x = VectorXf::Random(cols); + + bool densedone = false; + + //for (float density = DENSITY; density>=MINDENSITY; density*=0.5) +// float density = 0.5; + { + EigenSparseSelfAdjointMatrix sm1(rows, cols); + std::cout << "Generate sparse matrix (might take a while)...\n"; + fillSpdMatrix(density, rows, cols, sm1); + std::cout << "DONE\n\n"; + + // dense matrices + #ifdef DENSEMATRIX + if (!densedone) + { + densedone = true; + std::cout << "Eigen Dense\t" << density*100 << "%\n"; + DenseMatrix m1(rows,cols); + eiToDense(sm1, m1); + m1 = (m1 + m1.transpose()).eval(); + m1.diagonal() *= 0.5; + +// BENCH(LLT chol(m1);) +// std::cout << "dense:\t" << timer.value() << endl; + + BenchTimer timer; + timer.start(); + LLT chol(m1); + timer.stop(); + std::cout << "dense:\t" << timer.value() << endl; + int count = 0; + for (int j=0; j("Eigen/Sparse", sm1, Eigen::IncompleteFactorization); + + #ifdef EIGEN_CHOLMOD_SUPPORT + doEigen("Eigen/Cholmod", sm1, Eigen::IncompleteFactorization); + #endif + + #ifdef EIGEN_TAUCS_SUPPORT + doEigen("Eigen/Taucs", sm1, Eigen::IncompleteFactorization); + #endif + + #if 0 + // TAUCS + { + taucs_ccs_matrix A = sm1.asTaucsMatrix(); + + //BENCH(taucs_ccs_matrix* chol = taucs_ccs_factor_llt(&A, 0, 0);) +// BENCH(taucs_supernodal_factor_to_ccs(taucs_ccs_factor_llt_ll(&A));) +// std::cout << "taucs:\t" << timer.value() << endl; + + taucs_ccs_matrix* chol = taucs_ccs_factor_llt(&A, 0, 0); + + for (int j=0; jcolptr[j]; icolptr[j+1]; ++i) + std::cout << chol->values.d[i] << " "; + } + } + + // CHOLMOD + #ifdef EIGEN_CHOLMOD_SUPPORT + { + cholmod_common c; + cholmod_start (&c); + cholmod_sparse A; + cholmod_factor *L; + + A = sm1.asCholmodMatrix(); + BenchTimer timer; +// timer.reset(); + timer.start(); + std::vector perm(cols); +// std::vector set(ncols); + for (int i=0; icolptr[j]; icolptr[j+1]; ++i) +// std::cout << chol->values.s[i] << " "; +// } + } + #endif + + #endif + + + + } + + + return 0; +} + diff --git a/thirdparty/eigen/bench/sparse_dense_product.cpp b/thirdparty/eigen/bench/sparse_dense_product.cpp new file mode 100644 index 000000000..f3f519406 --- /dev/null +++ b/thirdparty/eigen/bench/sparse_dense_product.cpp @@ -0,0 +1,187 @@ + +//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out +//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.05 -DSIZE=2000 && ./a.out +// -DNOGMM -DNOMTL -DCSPARSE +// -I /home/gael/Coding/LinearAlgebra/CSparse/Include/ /home/gael/Coding/LinearAlgebra/CSparse/Lib/libcsparse.a +#ifndef SIZE +#define SIZE 650000 +#endif + +#ifndef DENSITY +#define DENSITY 0.01 +#endif + +#ifndef REPEAT +#define REPEAT 1 +#endif + +#include "BenchSparseUtil.h" + +#ifndef MINDENSITY +#define MINDENSITY 0.0004 +#endif + +#ifndef NBTRIES +#define NBTRIES 10 +#endif + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j=MINDENSITY; density*=0.5) + { + //fillMatrix(density, rows, cols, sm1); + fillMatrix2(7, rows, cols, sm1); + + // dense matrices + #ifdef DENSEMATRIX + { + std::cout << "Eigen Dense\t" << density*100 << "%\n"; + DenseMatrix m1(rows,cols); + eiToDense(sm1, m1); + + timer.reset(); + timer.start(); + for (int k=0; k m1(sm1); +// std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/float(m1.rows()*m1.cols())*100 << "%\n"; +// +// BENCH(for (int k=0; k gmmV1(cols), gmmV2(cols); + Map >(&gmmV1[0], cols) = v1; + Map >(&gmmV2[0], cols) = v2; + + BENCH( asm("#myx"); gmm::mult(m1, gmmV1, gmmV2); asm("#myy"); ) + std::cout << " a * v:\t" << timer.value() << endl; + + BENCH( gmm::mult(gmm::transposed(m1), gmmV1, gmmV2); ) + std::cout << " a' * v:\t" << timer.value() << endl; + } + #endif + + #ifndef NOUBLAS + { + std::cout << "ublas sparse\t" << density*100 << "%\n"; + UBlasSparse m1(rows,cols); + eiToUblas(sm1, m1); + + boost::numeric::ublas::vector uv1, uv2; + eiToUblasVec(v1,uv1); + eiToUblasVec(v2,uv2); + +// std::vector gmmV1(cols), gmmV2(cols); +// Map >(&gmmV1[0], cols) = v1; +// Map >(&gmmV2[0], cols) = v2; + + BENCH( uv2 = boost::numeric::ublas::prod(m1, uv1); ) + std::cout << " a * v:\t" << timer.value() << endl; + +// BENCH( boost::ublas::prod(gmm::transposed(m1), gmmV1, gmmV2); ) +// std::cout << " a' * v:\t" << timer.value() << endl; + } + #endif + + // MTL4 + #ifndef NOMTL + { + std::cout << "MTL4\t" << density*100 << "%\n"; + MtlSparse m1(rows,cols); + eiToMtl(sm1, m1); + mtl::dense_vector mtlV1(cols, 1.0); + mtl::dense_vector mtlV2(cols, 1.0); + + timer.reset(); + timer.start(); + for (int k=0; k + +#define NOGMM +#define NOMTL + +#ifndef SIZE +#define SIZE 10 +#endif + +#ifndef DENSITY +#define DENSITY 0.01 +#endif + +#ifndef REPEAT +#define REPEAT 1 +#endif + +#include "BenchSparseUtil.h" + +#ifndef MINDENSITY +#define MINDENSITY 0.0004 +#endif + +#ifndef NBTRIES +#define NBTRIES 10 +#endif + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j VectorX; + +#include + +template +void doEigen(const char* name, const EigenSparseMatrix& sm1, const VectorX& b, VectorX& x, int flags = 0) +{ + std::cout << name << "..." << std::flush; + BenchTimer timer; timer.start(); + SparseLU lu(sm1, flags); + timer.stop(); + if (lu.succeeded()) + std::cout << ":\t" << timer.value() << endl; + else + { + std::cout << ":\t FAILED" << endl; + return; + } + + bool ok; + timer.reset(); timer.start(); + ok = lu.solve(b,&x); + timer.stop(); + if (ok) + std::cout << " solve:\t" << timer.value() << endl; + else + std::cout << " solve:\t" << " FAILED" << endl; + + //std::cout << x.transpose() << "\n"; +} + +int main(int argc, char *argv[]) +{ + int rows = SIZE; + int cols = SIZE; + float density = DENSITY; + BenchTimer timer; + + VectorX b = VectorX::Random(cols); + VectorX x = VectorX::Random(cols); + + bool densedone = false; + + //for (float density = DENSITY; density>=MINDENSITY; density*=0.5) +// float density = 0.5; + { + EigenSparseMatrix sm1(rows, cols); + fillMatrix(density, rows, cols, sm1); + + // dense matrices + #ifdef DENSEMATRIX + if (!densedone) + { + densedone = true; + std::cout << "Eigen Dense\t" << density*100 << "%\n"; + DenseMatrix m1(rows,cols); + eiToDense(sm1, m1); + + BenchTimer timer; + timer.start(); + FullPivLU lu(m1); + timer.stop(); + std::cout << "Eigen/dense:\t" << timer.value() << endl; + + timer.reset(); + timer.start(); + lu.solve(b,&x); + timer.stop(); + std::cout << " solve:\t" << timer.value() << endl; +// std::cout << b.transpose() << "\n"; +// std::cout << x.transpose() << "\n"; + } + #endif + + #ifdef EIGEN_UMFPACK_SUPPORT + x.setZero(); + doEigen("Eigen/UmfPack (auto)", sm1, b, x, 0); + #endif + + #ifdef EIGEN_SUPERLU_SUPPORT + x.setZero(); + doEigen("Eigen/SuperLU (nat)", sm1, b, x, Eigen::NaturalOrdering); +// doEigen("Eigen/SuperLU (MD AT+A)", sm1, b, x, Eigen::MinimumDegree_AT_PLUS_A); +// doEigen("Eigen/SuperLU (MD ATA)", sm1, b, x, Eigen::MinimumDegree_ATA); + doEigen("Eigen/SuperLU (COLAMD)", sm1, b, x, Eigen::ColApproxMinimumDegree); + #endif + + } + + return 0; +} + diff --git a/thirdparty/eigen/bench/sparse_product.cpp b/thirdparty/eigen/bench/sparse_product.cpp new file mode 100644 index 000000000..d2fc44f0d --- /dev/null +++ b/thirdparty/eigen/bench/sparse_product.cpp @@ -0,0 +1,323 @@ + +//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out +//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.05 -DSIZE=2000 && ./a.out +// -DNOGMM -DNOMTL -DCSPARSE +// -I /home/gael/Coding/LinearAlgebra/CSparse/Include/ /home/gael/Coding/LinearAlgebra/CSparse/Lib/libcsparse.a + +#include + +#ifndef SIZE +#define SIZE 1000000 +#endif + +#ifndef NNZPERCOL +#define NNZPERCOL 6 +#endif + +#ifndef REPEAT +#define REPEAT 1 +#endif + +#include +#include "BenchTimer.h" +#include "BenchUtil.h" +#include "BenchSparseUtil.h" + +#ifndef NBTRIES +#define NBTRIES 1 +#endif + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j +// void mkl_multiply(const Lhs& lhs, const Rhs& rhs, Res& res) +// { +// char n = 'N'; +// float alpha = 1; +// char matdescra[6]; +// matdescra[0] = 'G'; +// matdescra[1] = 0; +// matdescra[2] = 0; +// matdescra[3] = 'C'; +// mkl_scscmm(&n, lhs.rows(), rhs.cols(), lhs.cols(), &alpha, matdescra, +// lhs._valuePtr(), lhs._innerIndexPtr(), lhs.outerIndexPtr(), +// pntre, b, &ldb, &beta, c, &ldc); +// // mkl_somatcopy('C', 'T', lhs.rows(), lhs.cols(), 1, +// // lhs._valuePtr(), lhs.rows(), DST, dst_stride); +// } +// +// #endif + + +#ifdef CSPARSE +cs* cs_sorted_multiply(const cs* a, const cs* b) +{ +// return cs_multiply(a,b); + + cs* A = cs_transpose(a, 1); + cs* B = cs_transpose(b, 1); + cs* D = cs_multiply(B,A); /* D = B'*A' */ + cs_spfree (A) ; + cs_spfree (B) ; + cs_dropzeros (D) ; /* drop zeros from D */ + cs* C = cs_transpose (D, 1) ; /* C = D', so that C is sorted */ + cs_spfree (D) ; + return C; + +// cs* A = cs_transpose(a, 1); +// cs* C = cs_transpose(A, 1); +// return C; +} + +cs* cs_sorted_multiply2(const cs* a, const cs* b) +{ + cs* D = cs_multiply(a,b); + cs* E = cs_transpose(D,1); + cs_spfree(D); + cs* C = cs_transpose(E,1); + cs_spfree(E); + return C; +} +#endif + +void bench_sort(); + +int main(int argc, char *argv[]) +{ +// bench_sort(); + + int rows = SIZE; + int cols = SIZE; + float density = DENSITY; + + EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols); + + BenchTimer timer; + for (int nnzPerCol = NNZPERCOL; nnzPerCol>1; nnzPerCol/=1.1) + { + sm1.setZero(); + sm2.setZero(); + fillMatrix2(nnzPerCol, rows, cols, sm1); + fillMatrix2(nnzPerCol, rows, cols, sm2); +// std::cerr << "filling OK\n"; + + // dense matrices + #ifdef DENSEMATRIX + { + std::cout << "Eigen Dense\t" << nnzPerCol << "%\n"; + DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols); + eiToDense(sm1, m1); + eiToDense(sm2, m2); + + timer.reset(); + timer.start(); + for (int k=0; k m1(sm1), m2(sm2), m3(sm3); + std::cout << "Eigen dyn-sparse\t" << m1.nonZeros()/(float(m1.rows())*float(m1.cols()))*100 << "% * " + << m2.nonZeros()/(float(m2.rows())*float(m2.cols()))*100 << "%\n"; + +// timer.reset(); +// timer.start(); + BENCH(for (int k=0; k +#include +#include +#include + +#ifndef SIZE +#define SIZE 10000 +#endif + +#ifndef DENSITY +#define DENSITY 0.01 +#endif + +#ifndef REPEAT +#define REPEAT 1 +#endif + +#include "BenchSparseUtil.h" + +#ifndef MINDENSITY +#define MINDENSITY 0.0004 +#endif + +#ifndef NBTRIES +#define NBTRIES 10 +#endif + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j +void dostuff(const char* name, EigenSparseMatrix& sm1) +{ + int rows = sm1.rows(); + int cols = sm1.cols(); + sm1.setZero(); + BenchTimer t; + SetterType* set1 = new SetterType(sm1); + t.reset(); t.start(); + for (int k=0; k(0,rows-1),internal::random(0,cols-1)) += 1; + t.stop(); + std::cout << "std::map => \t" << t.value()-rtime + << " nnz=" << set1->nonZeros() << std::flush; + + // getchar(); + + t.reset(); t.start(); delete set1; t.stop(); + std::cout << " back: \t" << t.value() << "\n"; +} + +int main(int argc, char *argv[]) +{ + int rows = SIZE; + int cols = SIZE; + float density = DENSITY; + + EigenSparseMatrix sm1(rows,cols), sm2(rows,cols); + + + nentries = rows*cols*density; + std::cout << "n = " << nentries << "\n"; + int dummy; + BenchTimer t; + + t.reset(); t.start(); + for (int k=0; k(0,rows-1) + internal::random(0,cols-1); + t.stop(); + rtime = t.value(); + std::cout << "rtime = " << rtime << " (" << dummy << ")\n\n"; + const int Bits = 6; + for (;;) + { + dostuff >("std::map ", sm1); + dostuff >("gnu::hash_map", sm1); + dostuff >("google::dense", sm1); + dostuff >("google::sparse", sm1); + +// { +// RandomSetter set1(sm1); +// t.reset(); t.start(); +// for (int k=0; k(0,rows-1),internal::random(0,cols-1)) += 1; +// t.stop(); +// std::cout << "gnu::hash_map => \t" << t.value()-rtime +// << " nnz=" << set1.nonZeros() << "\n";getchar(); +// } +// { +// RandomSetter set1(sm1); +// t.reset(); t.start(); +// for (int k=0; k(0,rows-1),internal::random(0,cols-1)) += 1; +// t.stop(); +// std::cout << "google::dense => \t" << t.value()-rtime +// << " nnz=" << set1.nonZeros() << "\n";getchar(); +// } +// { +// RandomSetter set1(sm1); +// t.reset(); t.start(); +// for (int k=0; k(0,rows-1),internal::random(0,cols-1)) += 1; +// t.stop(); +// std::cout << "google::sparse => \t" << t.value()-rtime +// << " nnz=" << set1.nonZeros() << "\n";getchar(); +// } + std::cout << "\n\n"; + } + + return 0; +} + diff --git a/thirdparty/eigen/bench/sparse_setter.cpp b/thirdparty/eigen/bench/sparse_setter.cpp new file mode 100644 index 000000000..a9f0b11cc --- /dev/null +++ b/thirdparty/eigen/bench/sparse_setter.cpp @@ -0,0 +1,485 @@ + +//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out +//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.05 -DSIZE=2000 && ./a.out +// -DNOGMM -DNOMTL -DCSPARSE +// -I /home/gael/Coding/LinearAlgebra/CSparse/Include/ /home/gael/Coding/LinearAlgebra/CSparse/Lib/libcsparse.a +#ifndef SIZE +#define SIZE 100000 +#endif + +#ifndef NBPERROW +#define NBPERROW 24 +#endif + +#ifndef REPEAT +#define REPEAT 2 +#endif + +#ifndef NBTRIES +#define NBTRIES 2 +#endif + +#ifndef KK +#define KK 10 +#endif + +#ifndef NOGOOGLE +#define EIGEN_GOOGLEHASH_SUPPORT +#include +#endif + +#include "BenchSparseUtil.h" + +#define CHECK_MEM +// #define CHECK_MEM std/**/::cout << "check mem\n"; getchar(); + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j Coordinates; +typedef std::vector Values; + +EIGEN_DONT_INLINE Scalar* setinnerrand_eigen(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_eigen_dynamic(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_eigen_compact(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_eigen_sumeq(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_eigen_gnu_hash(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_eigen_google_dense(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_eigen_google_sparse(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_scipy(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_ublas_mapped(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_ublas_coord(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_ublas_compressed(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_ublas_genvec(const Coordinates& coords, const Values& vals); +EIGEN_DONT_INLINE Scalar* setrand_mtl(const Coordinates& coords, const Values& vals); + +int main(int argc, char *argv[]) +{ + int rows = SIZE; + int cols = SIZE; + bool fullyrand = true; + + BenchTimer timer; + Coordinates coords; + Values values; + if(fullyrand) + { + Coordinates pool; + pool.reserve(cols*NBPERROW); + std::cerr << "fill pool" << "\n"; + for (int i=0; i stencil(SIZE,SIZE); + Vector2i ij(internal::random(0,rows-1),internal::random(0,cols-1)); +// if(stencil.coeffRef(ij.x(), ij.y())==0) + { +// stencil.coeffRef(ij.x(), ij.y()) = 1; + pool.push_back(ij); + + } + ++i; + } + std::cerr << "pool ok" << "\n"; + int n = cols*NBPERROW*KK; + coords.reserve(n); + values.reserve(n); + for (int i=0; i(0,pool.size()); + coords.push_back(pool[i]); + values.push_back(internal::random()); + } + } + else + { + for (int j=0; j(0,rows-1),j)); + values.push_back(internal::random()); + } + } + std::cout << "nnz = " << coords.size() << "\n"; + CHECK_MEM + + // dense matrices + #ifdef DENSEMATRIX + { + BENCH(setrand_eigen_dense(coords,values);) + std::cout << "Eigen Dense\t" << timer.value() << "\n"; + } + #endif + + // eigen sparse matrices +// if (!fullyrand) +// { +// BENCH(setinnerrand_eigen(coords,values);) +// std::cout << "Eigen fillrand\t" << timer.value() << "\n"; +// } + { + BENCH(setrand_eigen_dynamic(coords,values);) + std::cout << "Eigen dynamic\t" << timer.value() << "\n"; + } +// { +// BENCH(setrand_eigen_compact(coords,values);) +// std::cout << "Eigen compact\t" << timer.value() << "\n"; +// } + { + BENCH(setrand_eigen_sumeq(coords,values);) + std::cout << "Eigen sumeq\t" << timer.value() << "\n"; + } + { +// BENCH(setrand_eigen_gnu_hash(coords,values);) +// std::cout << "Eigen std::map\t" << timer.value() << "\n"; + } + { + BENCH(setrand_scipy(coords,values);) + std::cout << "scipy\t" << timer.value() << "\n"; + } + #ifndef NOGOOGLE + { + BENCH(setrand_eigen_google_dense(coords,values);) + std::cout << "Eigen google dense\t" << timer.value() << "\n"; + } + { + BENCH(setrand_eigen_google_sparse(coords,values);) + std::cout << "Eigen google sparse\t" << timer.value() << "\n"; + } + #endif + + #ifndef NOUBLAS + { +// BENCH(setrand_ublas_mapped(coords,values);) +// std::cout << "ublas mapped\t" << timer.value() << "\n"; + } + { + BENCH(setrand_ublas_genvec(coords,values);) + std::cout << "ublas vecofvec\t" << timer.value() << "\n"; + } + /*{ + timer.reset(); + timer.start(); + for (int k=0; k mat(SIZE,SIZE); + //mat.startFill(2000000/*coords.size()*/); + for (int i=0; i mat(SIZE,SIZE); + mat.reserve(coords.size()/10); + for (int i=0; i mat(SIZE,SIZE); + for (int j=0; j aux(SIZE,SIZE); + mat.reserve(n); + for (int i=j*n; i<(j+1)*n; ++i) + { + aux.insert(coords[i].x(), coords[i].y()) += vals[i]; + } + aux.finalize(); + mat += aux; + } + return &mat.coeffRef(coords[0].x(), coords[0].y()); +} + +EIGEN_DONT_INLINE Scalar* setrand_eigen_compact(const Coordinates& coords, const Values& vals) +{ + using namespace Eigen; + DynamicSparseMatrix setter(SIZE,SIZE); + setter.reserve(coords.size()/10); + for (int i=0; i mat = setter; + CHECK_MEM; + return &mat.coeffRef(coords[0].x(), coords[0].y()); +} + +EIGEN_DONT_INLINE Scalar* setrand_eigen_gnu_hash(const Coordinates& coords, const Values& vals) +{ + using namespace Eigen; + SparseMatrix mat(SIZE,SIZE); + { + RandomSetter, StdMapTraits > setter(mat); + for (int i=0; i mat(SIZE,SIZE); + { + RandomSetter, GoogleDenseHashMapTraits> setter(mat); + for (int i=0; i mat(SIZE,SIZE); + { + RandomSetter, GoogleSparseHashMapTraits> setter(mat); + for (int i=0; i +void coo_tocsr(const int n_row, + const int n_col, + const int nnz, + const Coordinates Aij, + const Values Ax, + int Bp[], + int Bj[], + T Bx[]) +{ + //compute number of non-zero entries per row of A coo_tocsr + std::fill(Bp, Bp + n_row, 0); + + for (int n = 0; n < nnz; n++){ + Bp[Aij[n].x()]++; + } + + //cumsum the nnz per row to get Bp[] + for(int i = 0, cumsum = 0; i < n_row; i++){ + int temp = Bp[i]; + Bp[i] = cumsum; + cumsum += temp; + } + Bp[n_row] = nnz; + + //write Aj,Ax into Bj,Bx + for(int n = 0; n < nnz; n++){ + int row = Aij[n].x(); + int dest = Bp[row]; + + Bj[dest] = Aij[n].y(); + Bx[dest] = Ax[n]; + + Bp[row]++; + } + + for(int i = 0, last = 0; i <= n_row; i++){ + int temp = Bp[i]; + Bp[i] = last; + last = temp; + } + + //now Bp,Bj,Bx form a CSR representation (with possible duplicates) +} + +template< class T1, class T2 > +bool kv_pair_less(const std::pair& x, const std::pair& y){ + return x.first < y.first; +} + + +template +void csr_sort_indices(const I n_row, + const I Ap[], + I Aj[], + T Ax[]) +{ + std::vector< std::pair > temp; + + for(I i = 0; i < n_row; i++){ + I row_start = Ap[i]; + I row_end = Ap[i+1]; + + temp.clear(); + + for(I jj = row_start; jj < row_end; jj++){ + temp.push_back(std::make_pair(Aj[jj],Ax[jj])); + } + + std::sort(temp.begin(),temp.end(),kv_pair_less); + + for(I jj = row_start, n = 0; jj < row_end; jj++, n++){ + Aj[jj] = temp[n].first; + Ax[jj] = temp[n].second; + } + } +} + +template +void csr_sum_duplicates(const I n_row, + const I n_col, + I Ap[], + I Aj[], + T Ax[]) +{ + I nnz = 0; + I row_end = 0; + for(I i = 0; i < n_row; i++){ + I jj = row_end; + row_end = Ap[i+1]; + while( jj < row_end ){ + I j = Aj[jj]; + T x = Ax[jj]; + jj++; + while( jj < row_end && Aj[jj] == j ){ + x += Ax[jj]; + jj++; + } + Aj[nnz] = j; + Ax[nnz] = x; + nnz++; + } + Ap[i+1] = nnz; + } +} + +EIGEN_DONT_INLINE Scalar* setrand_scipy(const Coordinates& coords, const Values& vals) +{ + using namespace Eigen; + SparseMatrix mat(SIZE,SIZE); + mat.resizeNonZeros(coords.size()); +// std::cerr << "setrand_scipy...\n"; + coo_tocsr(SIZE,SIZE, coords.size(), coords, vals, mat._outerIndexPtr(), mat._innerIndexPtr(), mat._valuePtr()); +// std::cerr << "coo_tocsr ok\n"; + + csr_sort_indices(SIZE, mat._outerIndexPtr(), mat._innerIndexPtr(), mat._valuePtr()); + + csr_sum_duplicates(SIZE, SIZE, mat._outerIndexPtr(), mat._innerIndexPtr(), mat._valuePtr()); + + mat.resizeNonZeros(mat._outerIndexPtr()[SIZE]); + + return &mat.coeffRef(coords[0].x(), coords[0].y()); +} + + +#ifndef NOUBLAS +EIGEN_DONT_INLINE Scalar* setrand_ublas_mapped(const Coordinates& coords, const Values& vals) +{ + using namespace boost; + using namespace boost::numeric; + using namespace boost::numeric::ublas; + mapped_matrix aux(SIZE,SIZE); + for (int i=0; i mat(aux); + return 0;// &mat(coords[0].x(), coords[0].y()); +} +/*EIGEN_DONT_INLINE Scalar* setrand_ublas_coord(const Coordinates& coords, const Values& vals) +{ + using namespace boost; + using namespace boost::numeric; + using namespace boost::numeric::ublas; + coordinate_matrix aux(SIZE,SIZE); + for (int i=0; i mat(aux); + return 0;//&mat(coords[0].x(), coords[0].y()); +} +EIGEN_DONT_INLINE Scalar* setrand_ublas_compressed(const Coordinates& coords, const Values& vals) +{ + using namespace boost; + using namespace boost::numeric; + using namespace boost::numeric::ublas; + compressed_matrix mat(SIZE,SIZE); + for (int i=0; i > foo; + generalized_vector_of_vector > > aux(SIZE,SIZE); + for (int i=0; i mat(aux); + return 0;//&mat(coords[0].x(), coords[0].y()); +} +#endif + +#ifndef NOMTL +EIGEN_DONT_INLINE void setrand_mtl(const Coordinates& coords, const Values& vals); +#endif + diff --git a/thirdparty/eigen/bench/sparse_transpose.cpp b/thirdparty/eigen/bench/sparse_transpose.cpp new file mode 100644 index 000000000..c9aacf5f1 --- /dev/null +++ b/thirdparty/eigen/bench/sparse_transpose.cpp @@ -0,0 +1,104 @@ + +//g++ -O3 -g0 -DNDEBUG sparse_transpose.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out +// -DNOGMM -DNOMTL +// -DCSPARSE -I /home/gael/Coding/LinearAlgebra/CSparse/Include/ /home/gael/Coding/LinearAlgebra/CSparse/Lib/libcsparse.a + +#ifndef SIZE +#define SIZE 10000 +#endif + +#ifndef DENSITY +#define DENSITY 0.01 +#endif + +#ifndef REPEAT +#define REPEAT 1 +#endif + +#include "BenchSparseUtil.h" + +#ifndef MINDENSITY +#define MINDENSITY 0.0004 +#endif + +#ifndef NBTRIES +#define NBTRIES 10 +#endif + +#define BENCH(X) \ + timer.reset(); \ + for (int _j=0; _j=MINDENSITY; density*=0.5) + { + fillMatrix(density, rows, cols, sm1); + + // dense matrices + #ifdef DENSEMATRIX + { + DenseMatrix m1(rows,cols), m3(rows,cols); + eiToDense(sm1, m1); + BENCH(for (int k=0; k EigenSparseTriMatrix; +typedef SparseMatrix EigenSparseTriMatrixRow; + +void fillMatrix(float density, int rows, int cols, EigenSparseTriMatrix& dst) +{ + dst.startFill(rows*cols*density); + for(int j = 0; j < cols; j++) + { + for(int i = 0; i < j; i++) + { + Scalar v = (internal::random(0,1) < density) ? internal::random() : 0; + if (v!=0) + dst.fill(i,j) = v; + } + dst.fill(j,j) = internal::random(); + } + dst.endFill(); +} + +int main(int argc, char *argv[]) +{ + int rows = SIZE; + int cols = SIZE; + float density = DENSITY; + BenchTimer timer; + #if 1 + EigenSparseTriMatrix sm1(rows,cols); + typedef Matrix DenseVector; + DenseVector b = DenseVector::Random(cols); + DenseVector x = DenseVector::Random(cols); + + bool densedone = false; + + for (float density = DENSITY; density>=MINDENSITY; density*=0.5) + { + EigenSparseTriMatrix sm1(rows, cols); + fillMatrix(density, rows, cols, sm1); + + // dense matrices + #ifdef DENSEMATRIX + if (!densedone) + { + densedone = true; + std::cout << "Eigen Dense\t" << density*100 << "%\n"; + DenseMatrix m1(rows,cols); + Matrix m2(rows,cols); + eiToDense(sm1, m1); + m2 = m1; + + BENCH(x = m1.marked().solveTriangular(b);) + std::cout << " colmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << x.transpose() << "\n"; + + BENCH(x = m2.marked().solveTriangular(b);) + std::cout << " rowmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << x.transpose() << "\n"; + } + #endif + + // eigen sparse matrices + { + std::cout << "Eigen sparse\t" << density*100 << "%\n"; + EigenSparseTriMatrixRow sm2 = sm1; + + BENCH(x = sm1.solveTriangular(b);) + std::cout << " colmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << x.transpose() << "\n"; + + BENCH(x = sm2.solveTriangular(b);) + std::cout << " rowmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << x.transpose() << "\n"; + +// x = b; +// BENCH(sm1.inverseProductInPlace(x);) +// std::cout << " colmajor^-1 * b:\t" << timer.value() << " (inplace)" << endl; +// std::cerr << x.transpose() << "\n"; +// +// x = b; +// BENCH(sm2.inverseProductInPlace(x);) +// std::cout << " rowmajor^-1 * b:\t" << timer.value() << " (inplace)" << endl; +// std::cerr << x.transpose() << "\n"; + } + + + + // CSparse + #ifdef CSPARSE + { + std::cout << "CSparse \t" << density*100 << "%\n"; + cs *m1; + eiToCSparse(sm1, m1); + + BENCH(x = b; if (!cs_lsolve (m1, x.data())){std::cerr << "cs_lsolve failed\n"; break;}; ) + std::cout << " colmajor^-1 * b:\t" << timer.value() << endl; + } + #endif + + // GMM++ + #ifndef NOGMM + { + std::cout << "GMM++ sparse\t" << density*100 << "%\n"; + GmmSparse m1(rows,cols); + gmm::csr_matrix m2; + eiToGmm(sm1, m1); + gmm::copy(m1,m2); + std::vector gmmX(cols), gmmB(cols); + Map >(&gmmX[0], cols) = x; + Map >(&gmmB[0], cols) = b; + + gmmX = gmmB; + BENCH(gmm::upper_tri_solve(m1, gmmX, false);) + std::cout << " colmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << Map >(&gmmX[0], cols).transpose() << "\n"; + + gmmX = gmmB; + BENCH(gmm::upper_tri_solve(m2, gmmX, false);) + timer.stop(); + std::cout << " rowmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << Map >(&gmmX[0], cols).transpose() << "\n"; + } + #endif + + // MTL4 + #ifndef NOMTL + { + std::cout << "MTL4\t" << density*100 << "%\n"; + MtlSparse m1(rows,cols); + MtlSparseRowMajor m2(rows,cols); + eiToMtl(sm1, m1); + m2 = m1; + mtl::dense_vector x(rows, 1.0); + mtl::dense_vector b(rows, 1.0); + + BENCH(x = mtl::upper_trisolve(m1,b);) + std::cout << " colmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << x << "\n"; + + BENCH(x = mtl::upper_trisolve(m2,b);) + std::cout << " rowmajor^-1 * b:\t" << timer.value() << endl; +// std::cerr << x << "\n"; + } + #endif + + + std::cout << "\n\n"; + } + #endif + + #if 0 + // bench small matrices (in-place versus return bye value) + { + timer.reset(); + for (int _j=0; _j<10; ++_j) { + Matrix4f m = Matrix4f::Random(); + Vector4f b = Vector4f::Random(); + Vector4f x = Vector4f::Random(); + timer.start(); + for (int _k=0; _k<1000000; ++_k) { + b = m.inverseProduct(b); + } + timer.stop(); + } + std::cout << "4x4 :\t" << timer.value() << endl; + } + + { + timer.reset(); + for (int _j=0; _j<10; ++_j) { + Matrix4f m = Matrix4f::Random(); + Vector4f b = Vector4f::Random(); + Vector4f x = Vector4f::Random(); + timer.start(); + for (int _k=0; _k<1000000; ++_k) { + m.inverseProductInPlace(x); + } + timer.stop(); + } + std::cout << "4x4 IP :\t" << timer.value() << endl; + } + #endif + + return 0; +} + diff --git a/thirdparty/eigen/bench/spbench/CMakeLists.txt b/thirdparty/eigen/bench/spbench/CMakeLists.txt new file mode 100644 index 000000000..8d53f4ae2 --- /dev/null +++ b/thirdparty/eigen/bench/spbench/CMakeLists.txt @@ -0,0 +1,78 @@ + + +set(BLAS_FOUND TRUE) +set(LAPACK_FOUND TRUE) +set(BLAS_LIBRARIES eigen_blas_static) +set(LAPACK_LIBRARIES eigen_lapack_static) + +set(SPARSE_LIBS "") + +# find_library(PARDISO_LIBRARIES pardiso412-GNU450-X86-64) +# if(PARDISO_LIBRARIES) +# add_definitions("-DEIGEN_PARDISO_SUPPORT") +# set(SPARSE_LIBS ${SPARSE_LIBS} ${PARDISO_LIBRARIES}) +# endif(PARDISO_LIBRARIES) + +find_package(Cholmod) +if(CHOLMOD_FOUND AND BLAS_FOUND AND LAPACK_FOUND) + add_definitions("-DEIGEN_CHOLMOD_SUPPORT") + include_directories(${CHOLMOD_INCLUDES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) +endif() + +find_package(Umfpack) +if(UMFPACK_FOUND AND BLAS_FOUND) + add_definitions("-DEIGEN_UMFPACK_SUPPORT") + include_directories(${UMFPACK_INCLUDES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) + set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) +endif() + +find_package(SuperLU 4.0) +if(SUPERLU_FOUND AND BLAS_FOUND) + add_definitions("-DEIGEN_SUPERLU_SUPPORT") + include_directories(${SUPERLU_INCLUDES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${BLAS_LIBRARIES}) + set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${BLAS_LIBRARIES}) +endif() + + +find_package(Pastix) +find_package(Scotch) +find_package(Metis) +if(PASTIX_FOUND AND BLAS_FOUND) + add_definitions("-DEIGEN_PASTIX_SUPPORT") + include_directories(${PASTIX_INCLUDES}) + if(SCOTCH_FOUND) + include_directories(${SCOTCH_INCLUDES}) + set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES}) + elseif(METIS_FOUND) + include_directories(${METIS_INCLUDES}) + set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES}) + endif(SCOTCH_FOUND) + set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES} ${ORDERING_LIBRARIES} ${BLAS_LIBRARIES}) + set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${BLAS_LIBRARIES}) +endif(PASTIX_FOUND AND BLAS_FOUND) + +if(METIS_FOUND) + include_directories(${METIS_INCLUDES}) + set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) + add_definitions("-DEIGEN_METIS_SUPPORT") +endif(METIS_FOUND) + +find_library(RT_LIBRARY rt) +if(RT_LIBRARY) + set(SPARSE_LIBS ${SPARSE_LIBS} ${RT_LIBRARY}) +endif(RT_LIBRARY) + +add_executable(spbenchsolver spbenchsolver.cpp) +target_link_libraries (spbenchsolver ${SPARSE_LIBS}) + +add_executable(spsolver sp_solver.cpp) +target_link_libraries (spsolver ${SPARSE_LIBS}) + + +add_executable(test_sparseLU test_sparseLU.cpp) +target_link_libraries (test_sparseLU ${SPARSE_LIBS}) + diff --git a/thirdparty/eigen/bench/spbench/sp_solver.cpp b/thirdparty/eigen/bench/spbench/sp_solver.cpp new file mode 100644 index 000000000..a1f4bac8a --- /dev/null +++ b/thirdparty/eigen/bench/spbench/sp_solver.cpp @@ -0,0 +1,125 @@ +// Small bench routine for Eigen available in Eigen +// (C) Desire NUENTSA WAKAM, INRIA + +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +// #include +#include +#include +using namespace std; +using namespace Eigen; + +int main(int argc, char **args) +{ + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + VectorXd b, x, tmp; + BenchTimer timer,totaltime; + //SparseLU > solver; +// SuperLU > solver; + ConjugateGradient, Lower,IncompleteCholesky > solver; + ifstream matrix_file; + string line; + int n; + // Set parameters +// solver.iparm(IPARM_THREAD_NBR) = 4; + /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ + if (argc < 2) assert(false && "please, give the matrix market file "); + + timer.start(); + totaltime.start(); + loadMarket(A, args[1]); + cout << "End charging matrix " << endl; + bool iscomplex=false, isvector=false; + int sym; + getMarketHeader(args[1], sym, iscomplex, isvector); + if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } + if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} + if (sym != 0) { // symmetric matrices, only the lower part is stored + SparseMatrix temp; + temp = A; + A = temp.selfadjointView(); + } + timer.stop(); + + n = A.cols(); + // ====== TESTS FOR SPARSE TUTORIAL ====== +// cout<< "OuterSize " << A.outerSize() << " inner " << A.innerSize() << endl; +// SparseMatrix mat1(A); +// SparseMatrix mat2; +// cout << " norm of A " << mat1.norm() << endl; ; +// PermutationMatrix perm(n); +// perm.resize(n,1); +// perm.indices().setLinSpaced(n, 0, n-1); +// mat2 = perm * mat1; +// mat.subrows(); +// mat2.resize(n,n); +// mat2.reserve(10); +// mat2.setConstant(); +// std::cout<< "NORM " << mat1.squaredNorm()<< endl; + + cout<< "Time to load the matrix " << timer.value() < 2) + loadMarketVector(b, args[2]); + else + { + b.resize(n); + tmp.resize(n); +// tmp.setRandom(); + for (int i = 0; i < n; i++) tmp(i) = i; + b = A * tmp ; + } +// Scaling > scal; +// scal.computeRef(A); +// b = scal.LeftScaling().cwiseProduct(b); + + /* Compute the factorization */ + cout<< "Starting the factorization "<< endl; + timer.reset(); + timer.start(); + cout<< "Size of Input Matrix "<< b.size()<<"\n\n"; + cout<< "Rows and columns "<< A.rows() <<" " < + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/thirdparty/eigen/bench/spbench/spbenchsolver.cpp b/thirdparty/eigen/bench/spbench/spbenchsolver.cpp new file mode 100644 index 000000000..4acd0039c --- /dev/null +++ b/thirdparty/eigen/bench/spbench/spbenchsolver.cpp @@ -0,0 +1,87 @@ +#include + +void bench_printhelp() +{ + cout<< " \nbenchsolver : performs a benchmark of all the solvers available in Eigen \n\n"; + cout<< " MATRIX FOLDER : \n"; + cout<< " The matrices for the benchmark should be collected in a folder specified with an environment variable EIGEN_MATRIXDIR \n"; + cout<< " The matrices are stored using the matrix market coordinate format \n"; + cout<< " The matrix and associated right-hand side (rhs) files are named respectively \n"; + cout<< " as MatrixName.mtx and MatrixName_b.mtx. If the rhs does not exist, a random one is generated. \n"; + cout<< " If a matrix is SPD, the matrix should be named as MatrixName_SPD.mtx \n"; + cout<< " If a true solution exists, it should be named as MatrixName_x.mtx; \n" ; + cout<< " it will be used to compute the norm of the error relative to the computed solutions\n\n"; + cout<< " OPTIONS : \n"; + cout<< " -h or --help \n print this help and return\n\n"; + cout<< " -d matrixdir \n Use matrixdir as the matrix folder instead of the one specified in the environment variable EIGEN_MATRIXDIR\n\n"; + cout<< " -o outputfile.xml \n Output the statistics to a xml file \n\n"; + cout<< " --eps Sets the relative tolerance for iterative solvers (default 1e-08) \n\n"; + cout<< " --maxits Sets the maximum number of iterations (default 1000) \n\n"; + +} +int main(int argc, char ** args) +{ + + bool help = ( get_options(argc, args, "-h") || get_options(argc, args, "--help") ); + if(help) { + bench_printhelp(); + return 0; + } + + // Get the location of the test matrices + string matrix_dir; + if (!get_options(argc, args, "-d", &matrix_dir)) + { + if(getenv("EIGEN_MATRIXDIR") == NULL){ + std::cerr << "Please, specify the location of the matrices with -d mat_folder or the environment variable EIGEN_MATRIXDIR \n"; + std::cerr << " Run with --help to see the list of all the available options \n"; + return -1; + } + matrix_dir = getenv("EIGEN_MATRIXDIR"); + } + + std::ofstream statbuf; + string statFile ; + + // Get the file to write the statistics + bool statFileExists = get_options(argc, args, "-o", &statFile); + if(statFileExists) + { + statbuf.open(statFile.c_str(), std::ios::out); + if(statbuf.good()){ + statFileExists = true; + printStatheader(statbuf); + statbuf.close(); + } + else + std::cerr << "Unable to open the provided file for writting... \n"; + } + + // Get the maximum number of iterations and the tolerance + int maxiters = 1000; + double tol = 1e-08; + string inval; + if (get_options(argc, args, "--eps", &inval)) + tol = atof(inval.c_str()); + if(get_options(argc, args, "--maxits", &inval)) + maxiters = atoi(inval.c_str()); + + string current_dir; + // Test the real-arithmetics matrices + Browse_Matrices(matrix_dir, statFileExists, statFile,maxiters, tol); + + // Test the complex-arithmetics matrices + Browse_Matrices >(matrix_dir, statFileExists, statFile, maxiters, tol); + + if(statFileExists) + { + statbuf.open(statFile.c_str(), std::ios::app); + statbuf << " \n"; + cout << "\n Output written in " << statFile << " ...\n"; + statbuf.close(); + } + + return 0; +} + + diff --git a/thirdparty/eigen/bench/spbench/spbenchsolver.h b/thirdparty/eigen/bench/spbench/spbenchsolver.h new file mode 100644 index 000000000..19c719c04 --- /dev/null +++ b/thirdparty/eigen/bench/spbench/spbenchsolver.h @@ -0,0 +1,554 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spbenchstyle.h" + +#ifdef EIGEN_METIS_SUPPORT +#include +#endif + +#ifdef EIGEN_CHOLMOD_SUPPORT +#include +#endif + +#ifdef EIGEN_UMFPACK_SUPPORT +#include +#endif + +#ifdef EIGEN_PARDISO_SUPPORT +#include +#endif + +#ifdef EIGEN_SUPERLU_SUPPORT +#include +#endif + +#ifdef EIGEN_PASTIX_SUPPORT +#include +#endif + +// CONSTANTS +#define EIGEN_UMFPACK 10 +#define EIGEN_SUPERLU 20 +#define EIGEN_PASTIX 30 +#define EIGEN_PARDISO 40 +#define EIGEN_SPARSELU_COLAMD 50 +#define EIGEN_SPARSELU_METIS 51 +#define EIGEN_BICGSTAB 60 +#define EIGEN_BICGSTAB_ILUT 61 +#define EIGEN_GMRES 70 +#define EIGEN_GMRES_ILUT 71 +#define EIGEN_SIMPLICIAL_LDLT 80 +#define EIGEN_CHOLMOD_LDLT 90 +#define EIGEN_PASTIX_LDLT 100 +#define EIGEN_PARDISO_LDLT 110 +#define EIGEN_SIMPLICIAL_LLT 120 +#define EIGEN_CHOLMOD_SUPERNODAL_LLT 130 +#define EIGEN_CHOLMOD_SIMPLICIAL_LLT 140 +#define EIGEN_PASTIX_LLT 150 +#define EIGEN_PARDISO_LLT 160 +#define EIGEN_CG 170 +#define EIGEN_CG_PRECOND 180 + +using namespace Eigen; +using namespace std; + + +// Global variables for input parameters +int MaximumIters; // Maximum number of iterations +double RelErr; // Relative error of the computed solution +double best_time_val; // Current best time overall solvers +int best_time_id; // id of the best solver for the current system + +template inline typename NumTraits::Real test_precision() { return NumTraits::dummy_precision(); } +template<> inline float test_precision() { return 1e-3f; } +template<> inline double test_precision() { return 1e-6; } +template<> inline float test_precision >() { return test_precision(); } +template<> inline double test_precision >() { return test_precision(); } + +void printStatheader(std::ofstream& out) +{ + // Print XML header + // NOTE It would have been much easier to write these XML documents using external libraries like tinyXML or Xerces-C++. + + out << " \n"; + out << " \n"; + out << "\n]>"; + out << "\n\n\n"; + + out << "\n \n" ; //root XML element + // Print the xsl style section + printBenchStyle(out); + // List all available solvers + out << " \n"; +#ifdef EIGEN_UMFPACK_SUPPORT + out <<" \n"; + out << " LU \n"; + out << " UMFPACK \n"; + out << " \n"; +#endif +#ifdef EIGEN_SUPERLU_SUPPORT + out <<" \n"; + out << " LU \n"; + out << " SUPERLU \n"; + out << " \n"; +#endif +#ifdef EIGEN_CHOLMOD_SUPPORT + out <<" \n"; + out << " LLT SP \n"; + out << " CHOLMOD \n"; + out << " \n"; + + out <<" \n"; + out << " LLT \n"; + out << " CHOLMOD \n"; + out << " \n"; + + out <<" \n"; + out << " LDLT \n"; + out << " CHOLMOD \n"; + out << " \n"; +#endif +#ifdef EIGEN_PARDISO_SUPPORT + out <<" \n"; + out << " LU \n"; + out << " PARDISO \n"; + out << " \n"; + + out <<" \n"; + out << " LLT \n"; + out << " PARDISO \n"; + out << " \n"; + + out <<" \n"; + out << " LDLT \n"; + out << " PARDISO \n"; + out << " \n"; +#endif +#ifdef EIGEN_PASTIX_SUPPORT + out <<" \n"; + out << " LU \n"; + out << " PASTIX \n"; + out << " \n"; + + out <<" \n"; + out << " LLT \n"; + out << " PASTIX \n"; + out << " \n"; + + out <<" \n"; + out << " LDLT \n"; + out << " PASTIX \n"; + out << " \n"; +#endif + + out <<" \n"; + out << " BICGSTAB \n"; + out << " EIGEN \n"; + out << " \n"; + + out <<" \n"; + out << " BICGSTAB_ILUT \n"; + out << " EIGEN \n"; + out << " \n"; + + out <<" \n"; + out << " GMRES_ILUT \n"; + out << " EIGEN \n"; + out << " \n"; + + out <<" \n"; + out << " LDLT \n"; + out << " EIGEN \n"; + out << " \n"; + + out <<" \n"; + out << " LLT \n"; + out << " EIGEN \n"; + out << " \n"; + + out <<" \n"; + out << " CG \n"; + out << " EIGEN \n"; + out << " \n"; + + out <<" \n"; + out << " LU_COLAMD \n"; + out << " EIGEN \n"; + out << " \n"; + +#ifdef EIGEN_METIS_SUPPORT + out <<" \n"; + out << " LU_METIS \n"; + out << " EIGEN \n"; + out << " \n"; +#endif + out << " \n"; + +} + + +template +void call_solver(Solver &solver, const int solver_id, const typename Solver::MatrixType& A, const Matrix& b, const Matrix& refX,std::ofstream& statbuf) +{ + + double total_time; + double compute_time; + double solve_time; + double rel_error; + Matrix x; + BenchTimer timer; + timer.reset(); + timer.start(); + solver.compute(A); + if (solver.info() != Success) + { + std::cerr << "Solver failed ... \n"; + return; + } + timer.stop(); + compute_time = timer.value(); + statbuf << "


+Prints the dimensions of the most generic object present in %Eigen. It could be any matrix expressions, any dense or sparse matrix and any array. + + + +
Example:Output:
+\include function_taking_eigenbase.cpp + +\verbinclude function_taking_eigenbase.out +
+ %DenseBase Example

+Prints a sub-block of the dense expression. Accepts any dense matrix or array expression, but no sparse objects and no special matrix classes such as DiagonalMatrix. +\code +template +void print_block(const DenseBase& b, int x, int y, int r, int c) +{ + std::cout << "block: " << b.block(x,y,r,c) << std::endl; +} +\endcode + %ArrayBase Example

+Prints the maximum coefficient of the array or array-expression. +\code +template +void print_max_coeff(const ArrayBase &a) +{ + std::cout << "max: " << a.maxCoeff() << std::endl; +} +\endcode + %MatrixBase Example

+Prints the inverse condition number of the given matrix or matrix-expression. +\code +template +void print_inv_cond(const MatrixBase& a) +{ + const typename JacobiSVD::SingularValuesType& + sing_vals = a.jacobiSvd().singularValues(); + std::cout << "inv cond: " << sing_vals(sing_vals.size()-1) / sing_vals(0) << std::endl; +} +\endcode + Multiple templated arguments example

+Calculate the Euclidean distance between two points. +\code +template +typename DerivedA::Scalar squaredist(const MatrixBase& p1,const MatrixBase& p2) +{ + return (p1-p2).squaredNorm(); +} +\endcode +Notice that we used two template parameters, one per argument. This permits the function to handle inputs of different types, e.g., +\code +squaredist(v1,2*v2) +\endcode +where the first argument \c v1 is a vector and the second argument \c 2*v2 is an expression. +

+ +These examples are just intended to give the reader a first impression of how functions can be written which take a plain and constant Matrix or Array argument. They are also intended to give the reader an idea about the most common base classes being the optimal candidates for functions. In the next section we will look in more detail at an example and the different ways it can be implemented, while discussing each implementation's problems and advantages. For the discussion below, Matrix and Array as well as MatrixBase and ArrayBase can be exchanged and all arguments still hold. + + +\section TopicUsingRefClass How to write generic, but non-templated function? + +In all the previous examples, the functions had to be template functions. This approach allows to write very generic code, but it is often desirable to write non templated function and still keep some level of genericity to avoid stupid copies of the arguments. The typical example is to write functions accepting both a MatrixXf or a block of a MatrixXf. This exactly the purpose of the Ref class. Here is a simple example: + + + + +
Example:Output:
+\include function_taking_ref.cpp + +\verbinclude function_taking_ref.out +
+In the first two calls to inv_cond, no copy occur because the memory layout of the arguments matches the memory layout accepted by Ref. However, in the last call, we have a generic expression that will be automatically evaluated into a temporary MatrixXf by the Ref<> object. + +A Ref object can also be writable. Here is an example of a function computing the covariance matrix of two input matrices where each row is an observation: +\code +void cov(const Ref x, const Ref y, Ref C) +{ + const float num_observations = static_cast(x.rows()); + const RowVectorXf x_mean = x.colwise().sum() / num_observations; + const RowVectorXf y_mean = y.colwise().sum() / num_observations; + C = (x.rowwise() - x_mean).transpose() * (y.rowwise() - y_mean) / num_observations; +} +\endcode +and here are two examples calling cov without any copy: +\code +MatrixXf m1, m2, m3 +cov(m1, m2, m3); +cov(m1.leftCols<3>(), m2.leftCols<3>(), m3.topLeftCorner<3,3>()); +\endcode +The Ref<> class has two other optional template arguments allowing to control the kind of memory layout that can be accepted without any copy. See the class Ref documentation for the details. + +\section TopicPlainFunctionsWorking In which cases do functions taking plain Matrix or Array arguments work? + +Without using template functions, and without the Ref class, a naive implementation of the previous cov function might look like this +\code +MatrixXf cov(const MatrixXf& x, const MatrixXf& y) +{ + const float num_observations = static_cast(x.rows()); + const RowVectorXf x_mean = x.colwise().sum() / num_observations; + const RowVectorXf y_mean = y.colwise().sum() / num_observations; + return (x.rowwise() - x_mean).transpose() * (y.rowwise() - y_mean) / num_observations; +} +\endcode +and contrary to what one might think at first, this implementation is fine unless you require a generic implementation that works with double matrices too and unless you do not care about temporary objects. Why is that the case? Where are temporaries involved? How can code as given below compile? +\code +MatrixXf x,y,z; +MatrixXf C = cov(x,y+z); +\endcode +In this special case, the example is fine and will be working because both parameters are declared as \e const references. The compiler creates a temporary and evaluates the expression x+z into this temporary. Once the function is processed, the temporary is released and the result is assigned to C. + +\b Note: Functions taking \e const references to Matrix (or Array) can process expressions at the cost of temporaries. + + +\section TopicPlainFunctionsFailing In which cases do functions taking a plain Matrix or Array argument fail? + +Here, we consider a slightly modified version of the function given above. This time, we do not want to return the result but pass an additional non-const paramter which allows us to store the result. A first naive implementation might look as follows. +\code +// Note: This code is flawed! +void cov(const MatrixXf& x, const MatrixXf& y, MatrixXf& C) +{ + const float num_observations = static_cast(x.rows()); + const RowVectorXf x_mean = x.colwise().sum() / num_observations; + const RowVectorXf y_mean = y.colwise().sum() / num_observations; + C = (x.rowwise() - x_mean).transpose() * (y.rowwise() - y_mean) / num_observations; +} +\endcode +When trying to execute the following code +\code +MatrixXf C = MatrixXf::Zero(3,6); +cov(x,y, C.block(0,0,3,3)); +\endcode +the compiler will fail, because it is not possible to convert the expression returned by \c MatrixXf::block() into a non-const \c MatrixXf&. This is the case because the compiler wants to protect you from writing your result to a temporary object. In this special case this protection is not intended -- we want to write to a temporary object. So how can we overcome this problem? + +The solution which is preferred at the moment is based on a little \em hack. One needs to pass a const reference to the matrix and internally the constness needs to be cast away. The correct implementation for C98 compliant compilers would be +\code +template +void cov(const MatrixBase& x, const MatrixBase& y, MatrixBase const & C) +{ + typedef typename Derived::Scalar Scalar; + typedef typename internal::plain_row_type::type RowVectorType; + + const Scalar num_observations = static_cast(x.rows()); + + const RowVectorType x_mean = x.colwise().sum() / num_observations; + const RowVectorType y_mean = y.colwise().sum() / num_observations; + + const_cast< MatrixBase& >(C) = + (x.rowwise() - x_mean).transpose() * (y.rowwise() - y_mean) / num_observations; +} +\endcode +The implementation above does now not only work with temporary expressions but it also allows to use the function with matrices of arbitrary floating point scalar types. + +\b Note: The const cast hack will only work with templated functions. It will not work with the MatrixXf implementation because it is not possible to cast a Block expression to a Matrix reference! + + + +\section TopicResizingInGenericImplementations How to resize matrices in generic implementations? + +One might think we are done now, right? This is not completely true because in order for our covariance function to be generically applicable, we want the follwing code to work +\code +MatrixXf x = MatrixXf::Random(100,3); +MatrixXf y = MatrixXf::Random(100,3); +MatrixXf C; +cov(x, y, C); +\endcode +This is not the case anymore, when we are using an implementation taking MatrixBase as a parameter. In general, %Eigen supports automatic resizing but it is not possible to do so on expressions. Why should resizing of a matrix Block be allowed? It is a reference to a sub-matrix and we definitely don't want to resize that. So how can we incorporate resizing if we cannot resize on MatrixBase? The solution is to resize the derived object as in this implementation. +\code +template +void cov(const MatrixBase& x, const MatrixBase& y, MatrixBase const & C_) +{ + typedef typename Derived::Scalar Scalar; + typedef typename internal::plain_row_type::type RowVectorType; + + const Scalar num_observations = static_cast(x.rows()); + + const RowVectorType x_mean = x.colwise().sum() / num_observations; + const RowVectorType y_mean = y.colwise().sum() / num_observations; + + MatrixBase& C = const_cast< MatrixBase& >(C_); + + C.derived().resize(x.cols(),x.cols()); // resize the derived object + C = (x.rowwise() - x_mean).transpose() * (y.rowwise() - y_mean) / num_observations; +} +\endcode +This implementation is now working for parameters being expressions and for parameters being matrices and having the wrong size. Resizing the expressions does not do any harm in this case unless they actually require resizing. That means, passing an expression with the wrong dimensions will result in a run-time error (in debug mode only) while passing expressions of the correct size will just work fine. + +\b Note: In the above discussion the terms Matrix and Array and MatrixBase and ArrayBase can be exchanged and all arguments still hold. + +\section TopicSummary Summary + + - To summarize, the implementation of functions taking non-writable (const referenced) objects is not a big issue and does not lead to problematic situations in terms of compiling and running your program. However, a naive implementation is likely to introduce unnecessary temporary objects in your code. In order to avoid evaluating parameters into temporaries, pass them as (const) references to MatrixBase or ArrayBase (so templatize your function). + + - Functions taking writable (non-const) parameters must take const references and cast away constness within the function body. + + - Functions that take as parameters MatrixBase (or ArrayBase) objects, and potentially need to resize them (in the case where they are resizable), must call resize() on the derived class, as returned by derived(). +*/ +} diff --git a/thirdparty/eigen/doc/HiPerformance.dox b/thirdparty/eigen/doc/HiPerformance.dox new file mode 100644 index 000000000..ab6cdfd44 --- /dev/null +++ b/thirdparty/eigen/doc/HiPerformance.dox @@ -0,0 +1,128 @@ + +namespace Eigen { + +/** \page TopicWritingEfficientProductExpression Writing efficient matrix product expressions + +In general achieving good performance with Eigen does no require any special effort: +simply write your expressions in the most high level way. This is especially true +for small fixed size matrices. For large matrices, however, it might be useful to +take some care when writing your expressions in order to minimize useless evaluations +and optimize the performance. +In this page we will give a brief overview of the Eigen's internal mechanism to simplify +and evaluate complex product expressions, and discuss the current limitations. +In particular we will focus on expressions matching level 2 and 3 BLAS routines, i.e, +all kind of matrix products and triangular solvers. + +Indeed, in Eigen we have implemented a set of highly optimized routines which are very similar +to BLAS's ones. Unlike BLAS, those routines are made available to user via a high level and +natural API. Each of these routines can compute in a single evaluation a wide variety of expressions. +Given an expression, the challenge is then to map it to a minimal set of routines. +As explained latter, this mechanism has some limitations, and knowing them will allow +you to write faster code by making your expressions more Eigen friendly. + +\section GEMM General Matrix-Matrix product (GEMM) + +Let's start with the most common primitive: the matrix product of general dense matrices. +In the BLAS world this corresponds to the GEMM routine. Our equivalent primitive can +perform the following operation: +\f$ C.noalias() += \alpha op1(A) op2(B) \f$ +where A, B, and C are column and/or row major matrices (or sub-matrices), +alpha is a scalar value, and op1, op2 can be transpose, adjoint, conjugate, or the identity. +When Eigen detects a matrix product, it analyzes both sides of the product to extract a +unique scalar factor alpha, and for each side, its effective storage order, shape, and conjugation states. +More precisely each side is simplified by iteratively removing trivial expressions such as scalar multiple, +negation and conjugation. Transpose and Block expressions are not evaluated and they only modify the storage order +and shape. All other expressions are immediately evaluated. +For instance, the following expression: +\code m1.noalias() -= s4 * (s1 * m2.adjoint() * (-(s3*m3).conjugate()*s2)) \endcode +is automatically simplified to: +\code m1.noalias() += (s1*s2*conj(s3)*s4) * m2.adjoint() * m3.conjugate() \endcode +which exactly matches our GEMM routine. + +\subsection GEMM_Limitations Limitations +Unfortunately, this simplification mechanism is not perfect yet and not all expressions which could be +handled by a single GEMM-like call are correctly detected. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Not optimal expressionEvaluated asOptimal version (single evaluation)Comments
\code +m1 += m2 * m3; \endcode\code +temp = m2 * m3; +m1 += temp; \endcode\code +m1.noalias() += m2 * m3; \endcodeUse .noalias() to tell Eigen the result and right-hand-sides do not alias. + Otherwise the product m2 * m3 is evaluated into a temporary.
\code +m1.noalias() += s1 * (m2 * m3); \endcodeThis is a special feature of Eigen. Here the product between a scalar + and a matrix product does not evaluate the matrix product but instead it + returns a matrix product expression tracking the scalar scaling factor.
+ Without this optimization, the matrix product would be evaluated into a + temporary as in the next example.
\code +m1.noalias() += (m2 * m3).adjoint(); \endcode\code +temp = m2 * m3; +m1 += temp.adjoint(); \endcode\code +m1.noalias() += m3.adjoint() +* * m2.adjoint(); \endcodeThis is because the product expression has the EvalBeforeNesting bit which + enforces the evaluation of the product by the Tranpose expression.
\code +m1 = m1 + m2 * m3; \endcode\code +temp = m2 * m3; +m1 = m1 + temp; \endcode\code m1.noalias() += m2 * m3; \endcodeHere there is no way to detect at compile time that the two m1 are the same, + and so the matrix product will be immediately evaluated.
\code +m1.noalias() = m4 + m2 * m3; \endcode\code +temp = m2 * m3; +m1 = m4 + temp; \endcode\code +m1 = m4; +m1.noalias() += m2 * m3; \endcodeFirst of all, here the .noalias() in the first expression is useless because + m2*m3 will be evaluated anyway. However, note how this expression can be rewritten + so that no temporary is required. (tip: for very small fixed size matrix + it is slighlty better to rewrite it like this: m1.noalias() = m2 * m3; m1 += m4;
\code +m1.noalias() += (s1*m2).block(..) * m3; \endcode\code +temp = (s1*m2).block(..); +m1 += temp * m3; \endcode\code +m1.noalias() += s1 * m2.block(..) * m3; \endcodeThis is because our expression analyzer is currently not able to extract trivial + expressions nested in a Block expression. Therefore the nested scalar + multiple cannot be properly extracted.
+ +Of course all these remarks hold for all other kind of products involving triangular or selfadjoint matrices. + +*/ + +} diff --git a/thirdparty/eigen/doc/InplaceDecomposition.dox b/thirdparty/eigen/doc/InplaceDecomposition.dox new file mode 100644 index 000000000..cb1c6d413 --- /dev/null +++ b/thirdparty/eigen/doc/InplaceDecomposition.dox @@ -0,0 +1,115 @@ +namespace Eigen { + +/** \eigenManualPage InplaceDecomposition Inplace matrix decompositions + +Starting from %Eigen 3.3, the LU, Cholesky, and QR decompositions can operate \em inplace, that is, directly within the given input matrix. +This feature is especially useful when dealing with huge matrices, and or when the available memory is very limited (embedded systems). + +To this end, the respective decomposition class must be instantiated with a Ref<> matrix type, and the decomposition object must be constructed with the input matrix as argument. As an example, let us consider an inplace LU decomposition with partial pivoting. + +Let's start with the basic inclusions, and declaration of a 2x2 matrix \c A: + + + + + + + +
codeoutput
\snippet TutorialInplaceLU.cpp init + \snippet TutorialInplaceLU.out init +
+ +No surprise here! Then, let's declare our inplace LU object \c lu, and check the content of the matrix \c A: + + + + + + +
\snippet TutorialInplaceLU.cpp declaration + \snippet TutorialInplaceLU.out declaration +
+ +Here, the \c lu object computes and stores the \c L and \c U factors within the memory held by the matrix \c A. +The coefficients of \c A have thus been destroyed during the factorization, and replaced by the L and U factors as one can verify: + + + + + + +
\snippet TutorialInplaceLU.cpp matrixLU + \snippet TutorialInplaceLU.out matrixLU +
+ +Then, one can use the \c lu object as usual, for instance to solve the Ax=b problem: + + + + + +
\snippet TutorialInplaceLU.cpp solve + \snippet TutorialInplaceLU.out solve +
+ +Here, since the content of the original matrix \c A has been lost, we had to declared a new matrix \c A0 to verify the result. + +Since the memory is shared between \c A and \c lu, modifying the matrix \c A will make \c lu invalid. +This can easily be verified by modifying the content of \c A and trying to solve the initial problem again: + + + + + + +
\snippet TutorialInplaceLU.cpp modifyA + \snippet TutorialInplaceLU.out modifyA +
+ +Note that there is no shared pointer under the hood, it is the \b responsibility \b of \b the \b user to keep the input matrix \c A in life as long as \c lu is living. + +If one wants to update the factorization with the modified A, one has to call the compute method as usual: + + + + + +
\snippet TutorialInplaceLU.cpp recompute + \snippet TutorialInplaceLU.out recompute +
+ +Note that calling compute does not change the memory which is referenced by the \c lu object. Therefore, if the compute method is called with another matrix \c A1 different than \c A, then the content of \c A1 won't be modified. This is still the content of \c A that will be used to store the L and U factors of the matrix \c A1. +This can easily be verified as follows: + + + + + +
\snippet TutorialInplaceLU.cpp recompute_bis0 + \snippet TutorialInplaceLU.out recompute_bis0 +
+The matrix \c A1 is unchanged, and one can thus solve A1*x=b, and directly check the residual without any copy of \c A1: + + + + + +
\snippet TutorialInplaceLU.cpp recompute_bis1 + \snippet TutorialInplaceLU.out recompute_bis1 +
+ + +Here is the list of matrix decompositions supporting this inplace mechanism: + +- class LLT +- class LDLT +- class PartialPivLU +- class FullPivLU +- class HouseholderQR +- class ColPivHouseholderQR +- class FullPivHouseholderQR +- class CompleteOrthogonalDecomposition + +*/ + +} \ No newline at end of file diff --git a/thirdparty/eigen/doc/InsideEigenExample.dox b/thirdparty/eigen/doc/InsideEigenExample.dox new file mode 100644 index 000000000..ed053c69d --- /dev/null +++ b/thirdparty/eigen/doc/InsideEigenExample.dox @@ -0,0 +1,495 @@ +namespace Eigen { + +/** \page TopicInsideEigenExample What happens inside Eigen, on a simple example + +\eigenAutoToc + +
+ + +Consider the following example program: + +\code +#include + +int main() +{ + int size = 50; + // VectorXf is a vector of floats, with dynamic size. + Eigen::VectorXf u(size), v(size), w(size); + u = v + w; +} +\endcode + +The goal of this page is to understand how Eigen compiles it, assuming that SSE2 vectorization is enabled (GCC option -msse2). + +\section WhyInteresting Why it's interesting + +Maybe you think, that the above example program is so simple, that compiling it shouldn't involve anything interesting. So before starting, let us explain what is nontrivial in compiling it correctly -- that is, producing optimized code -- so that the complexity of Eigen, that we'll explain here, is really useful. + +Look at the line of code +\code + u = v + w; // (*) +\endcode + +The first important thing about compiling it, is that the arrays should be traversed only once, like +\code + for(int i = 0; i < size; i++) u[i] = v[i] + w[i]; +\endcode +The problem is that if we make a naive C++ library where the VectorXf class has an operator+ returning a VectorXf, then the line of code (*) will amount to: +\code + VectorXf tmp = v + w; + VectorXf u = tmp; +\endcode +Obviously, the introduction of the temporary \a tmp here is useless. It has a very bad effect on performance, first because the creation of \a tmp requires a dynamic memory allocation in this context, and second as there are now two for loops: +\code + for(int i = 0; i < size; i++) tmp[i] = v[i] + w[i]; + for(int i = 0; i < size; i++) u[i] = tmp[i]; +\endcode +Traversing the arrays twice instead of once is terrible for performance, as it means that we do many redundant memory accesses. + +The second important thing about compiling the above program, is to make correct use of SSE2 instructions. Notice that Eigen also supports AltiVec and that all the discussion that we make here applies also to AltiVec. + +SSE2, like AltiVec, is a set of instructions allowing to perform computations on packets of 128 bits at once. Since a float is 32 bits, this means that SSE2 instructions can handle 4 floats at once. This means that, if correctly used, they can make our computation go up to 4x faster. + +However, in the above program, we have chosen size=50, so our vectors consist of 50 float's, and 50 is not a multiple of 4. This means that we cannot hope to do all of that computation using SSE2 instructions. The second best thing, to which we should aim, is to handle the 48 first coefficients with SSE2 instructions, since 48 is the biggest multiple of 4 below 50, and then handle separately, without SSE2, the 49th and 50th coefficients. Something like this: + +\code + for(int i = 0; i < 4*(size/4); i+=4) u.packet(i) = v.packet(i) + w.packet(i); + for(int i = 4*(size/4); i < size; i++) u[i] = v[i] + w[i]; +\endcode + +So let us look line by line at our example program, and let's follow Eigen as it compiles it. + +\section ConstructingVectors Constructing vectors + +Let's analyze the first line: + +\code + Eigen::VectorXf u(size), v(size), w(size); +\endcode + +First of all, VectorXf is the following typedef: +\code + typedef Matrix VectorXf; +\endcode + +The class template Matrix is declared in src/Core/util/ForwardDeclarations.h with 6 template parameters, but the last 3 are automatically determined by the first 3. So you don't need to worry about them for now. Here, Matrix\ means a matrix of floats, with a dynamic number of rows and 1 column. + +The Matrix class inherits a base class, MatrixBase. Don't worry about it, for now it suffices to say that MatrixBase is what unifies matrices/vectors and all the expressions types -- more on that below. + +When we do +\code + Eigen::VectorXf u(size); +\endcode +the constructor that is called is Matrix::Matrix(int), in src/Core/Matrix.h. Besides some assertions, all it does is to construct the \a m_storage member, which is of type DenseStorage\. + +You may wonder, isn't it overengineering to have the storage in a separate class? The reason is that the Matrix class template covers all kinds of matrices and vector: both fixed-size and dynamic-size. The storage method is not the same in these two cases. For fixed-size, the matrix coefficients are stored as a plain member array. For dynamic-size, the coefficients will be stored as a pointer to a dynamically-allocated array. Because of this, we need to abstract storage away from the Matrix class. That's DenseStorage. + +Let's look at this constructor, in src/Core/DenseStorage.h. You can see that there are many partial template specializations of DenseStorages here, treating separately the cases where dimensions are Dynamic or fixed at compile-time. The partial specialization that we are looking at is: +\code +template class DenseStorage +\endcode + +Here, the constructor called is DenseStorage::DenseStorage(int size, int rows, int columns) +with size=50, rows=50, columns=1. + +Here is this constructor: +\code +inline DenseStorage(int size, int rows, int) : m_data(internal::aligned_new(size)), m_rows(rows) {} +\endcode + +Here, the \a m_data member is the actual array of coefficients of the matrix. As you see, it is dynamically allocated. Rather than calling new[] or malloc(), as you can see, we have our own internal::aligned_new defined in src/Core/util/Memory.h. What it does is that if vectorization is enabled, then it uses a platform-specific call to allocate a 128-bit-aligned array, as that is very useful for vectorization with both SSE2 and AltiVec. If vectorization is disabled, it amounts to the standard new[]. + +As you can see, the constructor also sets the \a m_rows member to \a size. Notice that there is no \a m_columns member: indeed, in this partial specialization of DenseStorage, we know the number of columns at compile-time, since the _Cols template parameter is different from Dynamic. Namely, in our case, _Cols is 1, which is to say that our vector is just a matrix with 1 column. Hence, there is no need to store the number of columns as a runtime variable. + +When you call VectorXf::data() to get the pointer to the array of coefficients, it returns DenseStorage::data() which returns the \a m_data member. + +When you call VectorXf::size() to get the size of the vector, this is actually a method in the base class MatrixBase. It determines that the vector is a column-vector, since ColsAtCompileTime==1 (this comes from the template parameters in the typedef VectorXf). It deduces that the size is the number of rows, so it returns VectorXf::rows(), which returns DenseStorage::rows(), which returns the \a m_rows member, which was set to \a size by the constructor. + +\section ConstructionOfSumXpr Construction of the sum expression + +Now that our vectors are constructed, let's move on to the next line: + +\code +u = v + w; +\endcode + +The executive summary is that operator+ returns a "sum of vectors" expression, but doesn't actually perform the computation. It is the operator=, whose call occurs thereafter, that does the computation. + +Let us now see what Eigen does when it sees this: + +\code +v + w +\endcode + +Here, v and w are of type VectorXf, which is a typedef for a specialization of Matrix (as we explained above), which is a subclass of MatrixBase. So what is being called is + +\code +MatrixBase::operator+(const MatrixBase&) +\endcode + +The return type of this operator is +\code +CwiseBinaryOp, VectorXf, VectorXf> +\endcode +The CwiseBinaryOp class is our first encounter with an expression template. As we said, the operator+ doesn't by itself perform any computation, it just returns an abstract "sum of vectors" expression. Since there are also "difference of vectors" and "coefficient-wise product of vectors" expressions, we unify them all as "coefficient-wise binary operations", which we abbreviate as "CwiseBinaryOp". "Coefficient-wise" means that the operations is performed coefficient by coefficient. "binary" means that there are two operands -- we are adding two vectors with one another. + +Now you might ask, what if we did something like + +\code +v + w + u; +\endcode + +The first v + w would return a CwiseBinaryOp as above, so in order for this to compile, we'd need to define an operator+ also in the class CwiseBinaryOp... at this point it starts looking like a nightmare: are we going to have to define all operators in each of the expression classes (as you guessed, CwiseBinaryOp is only one of many) ? This looks like a dead end! + +The solution is that CwiseBinaryOp itself, as well as Matrix and all the other expression types, is a subclass of MatrixBase. So it is enough to define once and for all the operators in class MatrixBase. + +Since MatrixBase is the common base class of different subclasses, the aspects that depend on the subclass must be abstracted from MatrixBase. This is called polymorphism. + +The classical approach to polymorphism in C++ is by means of virtual functions. This is dynamic polymorphism. Here we don't want dynamic polymorphism because the whole design of Eigen is based around the assumption that all the complexity, all the abstraction, gets resolved at compile-time. This is crucial: if the abstraction can't get resolved at compile-time, Eigen's compile-time optimization mechanisms become useless, not to mention that if that abstraction has to be resolved at runtime it'll incur an overhead by itself. + +Here, what we want is to have a single class MatrixBase as the base of many subclasses, in such a way that each MatrixBase object (be it a matrix, or vector, or any kind of expression) knows at compile-time (as opposed to run-time) of which particular subclass it is an object (i.e. whether it is a matrix, or an expression, and what kind of expression). + +The solution is the Curiously Recurring Template Pattern. Let's do the break now. Hopefully you can read this wikipedia page during the break if needed, but it won't be allowed during the exam. + +In short, MatrixBase takes a template parameter \a Derived. Whenever we define a subclass Subclass, we actually make Subclass inherit MatrixBase\. The point is that different subclasses inherit different MatrixBase types. Thanks to this, whenever we have an object of a subclass, and we call on it some MatrixBase method, we still remember even from inside the MatrixBase method which particular subclass we're talking about. + +This means that we can put almost all the methods and operators in the base class MatrixBase, and have only the bare minimum in the subclasses. If you look at the subclasses in Eigen, like for instance the CwiseBinaryOp class, they have very few methods. There are coeff() and sometimes coeffRef() methods for access to the coefficients, there are rows() and cols() methods returning the number of rows and columns, but there isn't much more than that. All the meat is in MatrixBase, so it only needs to be coded once for all kinds of expressions, matrices, and vectors. + +So let's end this digression and come back to the piece of code from our example program that we were currently analyzing, + +\code +v + w +\endcode + +Now that MatrixBase is a good friend, let's write fully the prototype of the operator+ that gets called here (this code is from src/Core/MatrixBase.h): + +\code +template +class MatrixBase +{ + // ... + + template + const CwiseBinaryOp::Scalar>, Derived, OtherDerived> + operator+(const MatrixBase &other) const; + + // ... +}; +\endcode + +Here of course, \a Derived and \a OtherDerived are VectorXf. + +As we said, CwiseBinaryOp is also used for other operations such as substration, so it takes another template parameter determining the operation that will be applied to coefficients. This template parameter is a functor, that is, a class in which we have an operator() so it behaves like a function. Here, the functor used is internal::scalar_sum_op. It is defined in src/Core/Functors.h. + +Let us now explain the internal::traits here. The internal::scalar_sum_op class takes one template parameter: the type of the numbers to handle. Here of course we want to pass the scalar type (a.k.a. numeric type) of VectorXf, which is \c float. How do we determine which is the scalar type of \a Derived ? Throughout Eigen, all matrix and expression types define a typedef \a Scalar which gives its scalar type. For example, VectorXf::Scalar is a typedef for \c float. So here, if life was easy, we could find the numeric type of \a Derived as just +\code +typename Derived::Scalar +\endcode +Unfortunately, we can't do that here, as the compiler would complain that the type Derived hasn't yet been defined. So we use a workaround: in src/Core/util/ForwardDeclarations.h, we declared (not defined!) all our subclasses, like Matrix, and we also declared the following class template: +\code +template struct internal::traits; +\endcode +In src/Core/Matrix.h, right \em before the definition of class Matrix, we define a partial specialization of internal::traits for T=Matrix\. In this specialization of internal::traits, we define the Scalar typedef. So when we actually define Matrix, it is legal to refer to "typename internal::traits\::Scalar". + +Anyway, we have declared our operator+. In our case, where \a Derived and \a OtherDerived are VectorXf, the above declaration amounts to: +\code +class MatrixBase +{ + // ... + + const CwiseBinaryOp, VectorXf, VectorXf> + operator+(const MatrixBase &other) const; + + // ... +}; +\endcode + +Let's now jump to src/Core/CwiseBinaryOp.h to see how it is defined. As you can see there, all it does is to return a CwiseBinaryOp object, and this object is just storing references to the left-hand-side and right-hand-side expressions -- here, these are the vectors \a v and \a w. Well, the CwiseBinaryOp object is also storing an instance of the (empty) functor class, but you shouldn't worry about it as that is a minor implementation detail. + +Thus, the operator+ hasn't performed any actual computation. To summarize, the operation \a v + \a w just returned an object of type CwiseBinaryOp which did nothing else than just storing references to \a v and \a w. + +\section Assignment The assignment + +At this point, the expression \a v + \a w has finished evaluating, so, in the process of compiling the line of code +\code +u = v + w; +\endcode +we now enter the operator=. + +What operator= is being called here? The vector u is an object of class VectorXf, i.e. Matrix. In src/Core/Matrix.h, inside the definition of class Matrix, we see this: +\code + template + inline Matrix& operator=(const MatrixBase& other) + { + eigen_assert(m_storage.data()!=0 && "you cannot use operator= with a non initialized matrix (instead use set()"); + return Base::operator=(other.derived()); + } +\endcode +Here, Base is a typedef for MatrixBase\. So, what is being called is the operator= of MatrixBase. Let's see its prototype in src/Core/MatrixBase.h: +\code + template + Derived& operator=(const MatrixBase& other); +\endcode +Here, \a Derived is VectorXf (since u is a VectorXf) and \a OtherDerived is CwiseBinaryOp. More specifically, as explained in the previous section, \a OtherDerived is: +\code +CwiseBinaryOp, VectorXf, VectorXf> +\endcode +So the full prototype of the operator= being called is: +\code +VectorXf& MatrixBase::operator=(const MatrixBase, VectorXf, VectorXf> > & other); +\endcode +This operator= literally reads "copying a sum of two VectorXf's into another VectorXf". + +Let's now look at the implementation of this operator=. It resides in the file src/Core/Assign.h. + +What we can see there is: +\code +template +template +inline Derived& MatrixBase + ::operator=(const MatrixBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} +\endcode + +OK so our next task is to understand internal::assign_selector :) + +Here is its declaration (all that is still in the same file src/Core/Assign.h) +\code +template +struct internal::assign_selector; +\endcode + +So internal::assign_selector takes 4 template parameters, but the 2 last ones are automatically determined by the 2 first ones. + +EvalBeforeAssigning is here to enforce the EvalBeforeAssigningBit. As explained here, certain expressions have this flag which makes them automatically evaluate into temporaries before assigning them to another expression. This is the case of the Product expression, in order to avoid strange aliasing effects when doing "m = m * m;" However, of course here our CwiseBinaryOp expression doesn't have the EvalBeforeAssigningBit: we said since the beginning that we didn't want a temporary to be introduced here. So if you go to src/Core/CwiseBinaryOp.h, you'll see that the Flags in internal::traits\ don't include the EvalBeforeAssigningBit. The Flags member of CwiseBinaryOp is then imported from the internal::traits by the EIGEN_GENERIC_PUBLIC_INTERFACE macro. Anyway, here the template parameter EvalBeforeAssigning has the value \c false. + +NeedToTranspose is here for the case where the user wants to copy a row-vector into a column-vector. We allow this as a special exception to the general rule that in assignments we require the dimesions to match. Anyway, here both the left-hand and right-hand sides are column vectors, in the sense that ColsAtCompileTime is equal to 1. So NeedToTranspose is \c false too. + +So, here we are in the partial specialization: +\code +internal::assign_selector +\endcode + +Here's how it is defined: +\code +template +struct internal::assign_selector { + static Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } +}; +\endcode + +OK so now our next job is to understand how lazyAssign works :) + +\code +template +template +inline Derived& MatrixBase + ::lazyAssign(const MatrixBase& other) +{ + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) + eigen_assert(rows() == other.rows() && cols() == other.cols()); + internal::assign_impl::run(derived(),other.derived()); + return derived(); +} +\endcode + +What do we see here? Some assertions, and then the only interesting line is: +\code + internal::assign_impl::run(derived(),other.derived()); +\endcode + +OK so now we want to know what is inside internal::assign_impl. + +Here is its declaration: +\code +template::Vectorization, + int Unrolling = internal::assign_traits::Unrolling> +struct internal::assign_impl; +\endcode +Again, internal::assign_selector takes 4 template parameters, but the 2 last ones are automatically determined by the 2 first ones. + +These two parameters \a Vectorization and \a Unrolling are determined by a helper class internal::assign_traits. Its job is to determine which vectorization strategy to use (that is \a Vectorization) and which unrolling strategy to use (that is \a Unrolling). + +We'll not enter into the details of how these strategies are chosen (this is in the implementation of internal::assign_traits at the top of the same file). Let's just say that here \a Vectorization has the value \a LinearVectorization, and \a Unrolling has the value \a NoUnrolling (the latter is obvious since our vectors have dynamic size so there's no way to unroll the loop at compile-time). + +So the partial specialization of internal::assign_impl that we're looking at is: +\code +internal::assign_impl +\endcode + +Here is how it's defined: +\code +template +struct internal::assign_impl +{ + static void run(Derived1 &dst, const Derived2 &src) + { + const int size = dst.size(); + const int packetSize = internal::packet_traits::size; + const int alignedStart = internal::assign_traits::DstIsAligned ? 0 + : internal::first_aligned(&dst.coeffRef(0), size); + const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + for(int index = 0; index < alignedStart; index++) + dst.copyCoeff(index, src); + + for(int index = alignedStart; index < alignedEnd; index += packetSize) + { + dst.template copyPacket::SrcAlignment>(index, src); + } + + for(int index = alignedEnd; index < size; index++) + dst.copyCoeff(index, src); + } +}; +\endcode + +Here's how it works. \a LinearVectorization means that the left-hand and right-hand side expression can be accessed linearly i.e. you can refer to their coefficients by one integer \a index, as opposed to having to refer to its coefficients by two integers \a row, \a column. + +As we said at the beginning, vectorization works with blocks of 4 floats. Here, \a PacketSize is 4. + +There are two potential problems that we need to deal with: +\li first, vectorization works much better if the packets are 128-bit-aligned. This is especially important for write access. So when writing to the coefficients of \a dst, we want to group these coefficients by packets of 4 such that each of these packets is 128-bit-aligned. In general, this requires to skip a few coefficients at the beginning of \a dst. This is the purpose of \a alignedStart. We then copy these first few coefficients one by one, not by packets. However, in our case, the \a dst expression is a VectorXf and remember that in the construction of the vectors we allocated aligned arrays. Thanks to \a DstIsAligned, Eigen remembers that without having to do any runtime check, so \a alignedStart is zero and this part is avoided altogether. +\li second, the number of coefficients to copy is not in general a multiple of \a packetSize. Here, there are 50 coefficients to copy and \a packetSize is 4. So we'll have to copy the last 2 coefficients one by one, not by packets. Here, \a alignedEnd is 48. + +Now come the actual loops. + +First, the vectorized part: the 48 first coefficients out of 50 will be copied by packets of 4: +\code + for(int index = alignedStart; index < alignedEnd; index += packetSize) + { + dst.template copyPacket::SrcAlignment>(index, src); + } +\endcode + +What is copyPacket? It is defined in src/Core/Coeffs.h: +\code +template +template +inline void MatrixBase::copyPacket(int index, const MatrixBase& other) +{ + eigen_internal_assert(index >= 0 && index < size()); + derived().template writePacket(index, + other.derived().template packet(index)); +} +\endcode + +OK, what are writePacket() and packet() here? + +First, writePacket() here is a method on the left-hand side VectorXf. So we go to src/Core/Matrix.h to look at its definition: +\code +template +inline void writePacket(int index, const PacketScalar& x) +{ + internal::pstoret(m_storage.data() + index, x); +} +\endcode +Here, \a StoreMode is \a #Aligned, indicating that we are doing a 128-bit-aligned write access, \a PacketScalar is a type representing a "SSE packet of 4 floats" and internal::pstoret is a function writing such a packet in memory. Their definitions are architecture-specific, we find them in src/Core/arch/SSE/PacketMath.h: + +The line in src/Core/arch/SSE/PacketMath.h that determines the PacketScalar type (via a typedef in Matrix.h) is: +\code +template<> struct internal::packet_traits { typedef __m128 type; enum {size=4}; }; +\endcode +Here, __m128 is a SSE-specific type. Notice that the enum \a size here is what was used to define \a packetSize above. + +And here is the implementation of internal::pstoret: +\code +template<> inline void internal::pstore(float* to, const __m128& from) { _mm_store_ps(to, from); } +\endcode +Here, __mm_store_ps is a SSE-specific intrinsic function, representing a single SSE instruction. The difference between internal::pstore and internal::pstoret is that internal::pstoret is a dispatcher handling both the aligned and unaligned cases, you find its definition in src/Core/GenericPacketMath.h: +\code +template +inline void internal::pstoret(Scalar* to, const Packet& from) +{ + if(LoadMode == Aligned) + internal::pstore(to, from); + else + internal::pstoreu(to, from); +} +\endcode + +OK, that explains how writePacket() works. Now let's look into the packet() call. Remember that we are analyzing this line of code inside copyPacket(): +\code +derived().template writePacket(index, + other.derived().template packet(index)); +\endcode + +Here, \a other is our sum expression \a v + \a w. The .derived() is just casting from MatrixBase to the subclass which here is CwiseBinaryOp. So let's go to src/Core/CwiseBinaryOp.h: +\code +class CwiseBinaryOp +{ + // ... + template + inline PacketScalar packet(int index) const + { + return m_functor.packetOp(m_lhs.template packet(index), m_rhs.template packet(index)); + } +}; +\endcode +Here, \a m_lhs is the vector \a v, and \a m_rhs is the vector \a w. So the packet() function here is Matrix::packet(). The template parameter \a LoadMode is \a #Aligned. So we're looking at +\code +class Matrix +{ + // ... + template + inline PacketScalar packet(int index) const + { + return internal::ploadt(m_storage.data() + index); + } +}; +\endcode +We let you look up the definition of internal::ploadt in GenericPacketMath.h and the internal::pload in src/Core/arch/SSE/PacketMath.h. It is very similar to the above for internal::pstore. + +Let's go back to CwiseBinaryOp::packet(). Once the packets from the vectors \a v and \a w have been returned, what does this function do? It calls m_functor.packetOp() on them. What is m_functor? Here we must remember what particular template specialization of CwiseBinaryOp we're dealing with: +\code +CwiseBinaryOp, VectorXf, VectorXf> +\endcode +So m_functor is an object of the empty class internal::scalar_sum_op. As we mentioned above, don't worry about why we constructed an object of this empty class at all -- it's an implementation detail, the point is that some other functors need to store member data. + +Anyway, internal::scalar_sum_op is defined in src/Core/Functors.h: +\code +template struct internal::scalar_sum_op EIGEN_EMPTY_STRUCT { + inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } + template + inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return internal::padd(a,b); } +}; +\endcode +As you can see, all what packetOp() does is to call internal::padd on the two packets. Here is the definition of internal::padd from src/Core/arch/SSE/PacketMath.h: +\code +template<> inline __m128 internal::padd(const __m128& a, const __m128& b) { return _mm_add_ps(a,b); } +\endcode +Here, _mm_add_ps is a SSE-specific intrinsic function, representing a single SSE instruction. + +To summarize, the loop +\code + for(int index = alignedStart; index < alignedEnd; index += packetSize) + { + dst.template copyPacket::SrcAlignment>(index, src); + } +\endcode +has been compiled to the following code: for \a index going from 0 to the 11 ( = 48/4 - 1), read the i-th packet (of 4 floats) from the vector v and the i-th packet from the vector w using two __mm_load_ps SSE instructions, then add them together using a __mm_add_ps instruction, then store the result using a __mm_store_ps instruction. + +There remains the second loop handling the last few (here, the last 2) coefficients: +\code + for(int index = alignedEnd; index < size; index++) + dst.copyCoeff(index, src); +\endcode +However, it works just like the one we just explained, it is just simpler because there is no SSE vectorization involved here. copyPacket() becomes copyCoeff(), packet() becomes coeff(), writePacket() becomes coeffRef(). If you followed us this far, you can probably understand this part by yourself. + +We see that all the C++ abstraction of Eigen goes away during compilation and that we indeed are precisely controlling which assembly instructions we emit. Such is the beauty of C++! Since we have such precise control over the emitted assembly instructions, but such complex logic to choose the right instructions, we can say that Eigen really behaves like an optimizing compiler. If you prefer, you could say that Eigen behaves like a script for the compiler. In a sense, C++ template metaprogramming is scripting the compiler -- and it's been shown that this scripting language is Turing-complete. See Wikipedia. + +*/ + +} diff --git a/thirdparty/eigen/doc/LeastSquares.dox b/thirdparty/eigen/doc/LeastSquares.dox new file mode 100644 index 000000000..e2191a22f --- /dev/null +++ b/thirdparty/eigen/doc/LeastSquares.dox @@ -0,0 +1,70 @@ +namespace Eigen { + +/** \eigenManualPage LeastSquares Solving linear least squares systems + +This page describes how to solve linear least squares systems using %Eigen. An overdetermined system +of equations, say \a Ax = \a b, has no solutions. In this case, it makes sense to search for the +vector \a x which is closest to being a solution, in the sense that the difference \a Ax - \a b is +as small as possible. This \a x is called the least square solution (if the Euclidean norm is used). + +The three methods discussed on this page are the SVD decomposition, the QR decomposition and normal +equations. Of these, the SVD decomposition is generally the most accurate but the slowest, normal +equations is the fastest but least accurate, and the QR decomposition is in between. + +\eigenAutoToc + + +\section LeastSquaresSVD Using the SVD decomposition + +The \link JacobiSVD::solve() solve() \endlink method in the JacobiSVD class can be directly used to +solve linear squares systems. It is not enough to compute only the singular values (the default for +this class); you also need the singular vectors but the thin SVD decomposition suffices for +computing least squares solutions: + + + + + + + +
Example:Output:
\include TutorialLinAlgSVDSolve.cpp \verbinclude TutorialLinAlgSVDSolve.out
+ +This is example from the page \link TutorialLinearAlgebra Linear algebra and decompositions \endlink. + + +\section LeastSquaresQR Using the QR decomposition + +The solve() method in QR decomposition classes also computes the least squares solution. There are +three QR decomposition classes: HouseholderQR (no pivoting, so fast but unstable), +ColPivHouseholderQR (column pivoting, thus a bit slower but more accurate) and FullPivHouseholderQR +(full pivoting, so slowest and most stable). Here is an example with column pivoting: + + + + + + + +
Example:Output:
\include LeastSquaresQR.cpp \verbinclude LeastSquaresQR.out
+ + +\section LeastSquaresNormalEquations Using normal equations + +Finding the least squares solution of \a Ax = \a b is equivalent to solving the normal equation +ATAx = ATb. This leads to the following code + + + + + + + +
Example:Output:
\include LeastSquaresNormalEquations.cpp \verbinclude LeastSquaresNormalEquations.out
+ +If the matrix \a A is ill-conditioned, then this is not a good method, because the condition number +of ATA is the square of the condition number of \a A. This means that you +lose twice as many digits using normal equation than if you use the other methods. + +*/ + +} \ No newline at end of file diff --git a/thirdparty/eigen/doc/Manual.dox b/thirdparty/eigen/doc/Manual.dox new file mode 100644 index 000000000..a08609ad7 --- /dev/null +++ b/thirdparty/eigen/doc/Manual.dox @@ -0,0 +1,188 @@ + +// This file strutures pages and modules into a convenient hierarchical structure. + +namespace Eigen { + +/** \page UserManual_CustomizingEigen Extending/Customizing Eigen + %Eigen can be extended in several ways, for instance, by defining global methods, by inserting custom methods within main %Eigen's classes through the \ref TopicCustomizing_Plugins "plugin" mechanism, by adding support to \ref TopicCustomizing_CustomScalar "custom scalar types" etc. See below for the respective sub-topics. + - \subpage TopicCustomizing_Plugins + - \subpage TopicCustomizing_InheritingMatrix + - \subpage TopicCustomizing_CustomScalar + - \subpage TopicCustomizing_NullaryExpr + - \subpage TopicNewExpressionType + \sa \ref TopicPreprocessorDirectives +*/ + + +/** \page UserManual_Generalities General topics + - \subpage Eigen2ToEigen3 + - \subpage TopicFunctionTakingEigenTypes + - \subpage TopicPreprocessorDirectives + - \subpage TopicAssertions + - \subpage TopicMultiThreading + - \subpage TopicUsingBlasLapack + - \subpage TopicUsingIntelMKL + - \subpage TopicCUDA + - \subpage TopicPitfalls + - \subpage TopicTemplateKeyword + - \subpage UserManual_UnderstandingEigen +*/ + +/** \page UserManual_UnderstandingEigen Understanding Eigen + - \subpage TopicInsideEigenExample + - \subpage TopicClassHierarchy + - \subpage TopicLazyEvaluation +*/ + +/** \page UnclassifiedPages Unclassified pages + - \subpage TopicResizing + - \subpage TopicVectorization + - \subpage TopicEigenExpressionTemplates + - \subpage TopicScalarTypes + - \subpage GettingStarted + - \subpage TutorialSparse_example_details + - \subpage TopicWritingEfficientProductExpression + - \subpage Experimental +*/ + + +/** \defgroup Support_modules Support modules + * Category of modules which add support for external libraries. + */ + + +/** \defgroup DenseMatrixManipulation_chapter Dense matrix and array manipulation */ +/** \defgroup DenseMatrixManipulation_Alignement Alignment issues */ +/** \defgroup DenseMatrixManipulation_Reference Reference */ + +/** \addtogroup TutorialMatrixClass + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialMatrixArithmetic + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialArrayClass + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialBlockOperations + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialAdvancedInitialization + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialReductionsVisitorsBroadcasting + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialMapClass + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialReshapeSlicing + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TopicAliasing + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TopicStorageOrders + \ingroup DenseMatrixManipulation_chapter */ + +/** \addtogroup DenseMatrixManipulation_Alignement + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TopicUnalignedArrayAssert + \ingroup DenseMatrixManipulation_Alignement */ +/** \addtogroup TopicFixedSizeVectorizable + \ingroup DenseMatrixManipulation_Alignement */ +/** \addtogroup TopicStructHavingEigenMembers + \ingroup DenseMatrixManipulation_Alignement */ +/** \addtogroup TopicStlContainers + \ingroup DenseMatrixManipulation_Alignement */ +/** \addtogroup TopicPassingByValue + \ingroup DenseMatrixManipulation_Alignement */ +/** \addtogroup TopicWrongStackAlignment + \ingroup DenseMatrixManipulation_Alignement */ + +/** \addtogroup DenseMatrixManipulation_Reference + \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup Core_Module + \ingroup DenseMatrixManipulation_Reference */ +/** \addtogroup Jacobi_Module + \ingroup DenseMatrixManipulation_Reference */ +/** \addtogroup Householder_Module + \ingroup DenseMatrixManipulation_Reference */ + +/** \addtogroup CoeffwiseMathFunctions + \ingroup DenseMatrixManipulation_chapter */ + +/** \addtogroup QuickRefPage + \ingroup DenseMatrixManipulation_chapter */ + + +/** \defgroup DenseLinearSolvers_chapter Dense linear problems and decompositions */ +/** \defgroup DenseLinearSolvers_Reference Reference */ + +/** \addtogroup TutorialLinearAlgebra + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup TopicLinearAlgebraDecompositions + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup LeastSquares + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup InplaceDecomposition + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup DenseDecompositionBenchmark + \ingroup DenseLinearSolvers_chapter */ + +/** \addtogroup DenseLinearSolvers_Reference + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup Cholesky_Module + \ingroup DenseLinearSolvers_Reference */ +/** \addtogroup LU_Module + \ingroup DenseLinearSolvers_Reference */ +/** \addtogroup QR_Module + \ingroup DenseLinearSolvers_Reference */ +/** \addtogroup SVD_Module + \ingroup DenseLinearSolvers_Reference*/ +/** \addtogroup Eigenvalues_Module + \ingroup DenseLinearSolvers_Reference */ + + + + +/** \defgroup Sparse_chapter Sparse linear algebra */ +/** \defgroup Sparse_Reference Reference */ + +/** \addtogroup TutorialSparse + \ingroup Sparse_chapter */ +/** \addtogroup TopicSparseSystems + \ingroup Sparse_chapter */ +/** \addtogroup MatrixfreeSolverExample + \ingroup Sparse_chapter */ + +/** \addtogroup Sparse_Reference + \ingroup Sparse_chapter */ +/** \addtogroup SparseCore_Module + \ingroup Sparse_Reference */ +/** \addtogroup OrderingMethods_Module + \ingroup Sparse_Reference */ +/** \addtogroup SparseCholesky_Module + \ingroup Sparse_Reference */ +/** \addtogroup SparseLU_Module + \ingroup Sparse_Reference */ +/** \addtogroup SparseQR_Module + \ingroup Sparse_Reference */ +/** \addtogroup IterativeLinearSolvers_Module + \ingroup Sparse_Reference */ +/** \addtogroup Sparse_Module + \ingroup Sparse_Reference */ +/** \addtogroup Support_modules + \ingroup Sparse_Reference */ + +/** \addtogroup SparseQuickRefPage + \ingroup Sparse_chapter */ + + +/** \defgroup Geometry_chapter Geometry */ +/** \defgroup Geometry_Reference Reference */ + +/** \addtogroup TutorialGeometry + \ingroup Geometry_chapter */ + +/** \addtogroup Geometry_Reference + \ingroup Geometry_chapter */ +/** \addtogroup Geometry_Module + \ingroup Geometry_Reference */ +/** \addtogroup Splines_Module + \ingroup Geometry_Reference */ + +/** \internal \brief Namespace containing low-level routines from the %Eigen library. */ +namespace internal {} +} diff --git a/thirdparty/eigen/doc/MatrixfreeSolverExample.dox b/thirdparty/eigen/doc/MatrixfreeSolverExample.dox new file mode 100644 index 000000000..3efa292b5 --- /dev/null +++ b/thirdparty/eigen/doc/MatrixfreeSolverExample.dox @@ -0,0 +1,20 @@ + +namespace Eigen { + +/** + +\eigenManualPage MatrixfreeSolverExample Matrix-free solvers + +Iterative solvers such as ConjugateGradient and BiCGSTAB can be used in a matrix free context. To this end, user must provide a wrapper class inheriting EigenBase<> and implementing the following methods: + - \c Index \c rows() and \c Index \c cols(): returns number of rows and columns respectively + - \c operator* with your type and an %Eigen dense column vector (its actual implementation goes in a specialization of the internal::generic_product_impl class) + +\c Eigen::internal::traits<> must also be specialized for the wrapper type. + +Here is a complete example wrapping an Eigen::SparseMatrix: +\include matrixfree_cg.cpp +Output: \verbinclude matrixfree_cg.out + +*/ + +} \ No newline at end of file diff --git a/thirdparty/eigen/doc/NewExpressionType.dox b/thirdparty/eigen/doc/NewExpressionType.dox new file mode 100644 index 000000000..c2f243312 --- /dev/null +++ b/thirdparty/eigen/doc/NewExpressionType.dox @@ -0,0 +1,143 @@ +namespace Eigen { + +/** \page TopicNewExpressionType Adding a new expression type + + +\warning +Disclaimer: this page is tailored to very advanced users who are not afraid of dealing with some %Eigen's internal aspects. +In most cases, a custom expression can be avoided by either using custom \ref MatrixBase::unaryExpr "unary" or \ref MatrixBase::binaryExpr "binary" functors, +while extremely complex matrix manipulations can be achieved by a nullary functors as described in the \ref TopicCustomizing_NullaryExpr "previous page". + +This page describes with the help of an example how to implement a new +light-weight expression type in %Eigen. This consists of three parts: +the expression type itself, a traits class containing compile-time +information about the expression, and the evaluator class which is +used to evaluate the expression to a matrix. + +\b TO \b DO: Write a page explaining the design, with details on +vectorization etc., and refer to that page here. + + +\eigenAutoToc + +\section TopicSetting The setting + +A circulant matrix is a matrix where each column is the same as the +column to the left, except that it is cyclically shifted downwards. +For example, here is a 4-by-4 circulant matrix: +\f[ \begin{bmatrix} + 1 & 8 & 4 & 2 \\ + 2 & 1 & 8 & 4 \\ + 4 & 2 & 1 & 8 \\ + 8 & 4 & 2 & 1 +\end{bmatrix} \f] +A circulant matrix is uniquely determined by its first column. We wish +to write a function \c makeCirculant which, given the first column, +returns an expression representing the circulant matrix. + +For simplicity, we restrict the \c makeCirculant function to dense +matrices. It may make sense to also allow arrays, or sparse matrices, +but we will not do so here. We also do not want to support +vectorization. + + +\section TopicPreamble Getting started + +We will present the file implementing the \c makeCirculant function +part by part. We start by including the appropriate header files and +forward declaring the expression class, which we will call +\c Circulant. The \c makeCirculant function will return an object of +this type. The class \c Circulant is in fact a class template; the +template argument \c ArgType refers to the type of the vector passed +to the \c makeCirculant function. + +\include make_circulant.cpp.preamble + + +\section TopicTraits The traits class + +For every expression class \c X, there should be a traits class +\c Traits in the \c Eigen::internal namespace containing +information about \c X known as compile time. + +As explained in \ref TopicSetting, we designed the \c Circulant +expression class to refer to dense matrices. The entries of the +circulant matrix have the same type as the entries of the vector +passed to the \c makeCirculant function. The type used to index the +entries is also the same. Again for simplicity, we will only return +column-major matrices. Finally, the circulant matrix is a square +matrix (number of rows equals number of columns), and the number of +rows equals the number of rows of the column vector passed to the +\c makeCirculant function. If this is a dynamic-size vector, then the +size of the circulant matrix is not known at compile-time. + +This leads to the following code: + +\include make_circulant.cpp.traits + + +\section TopicExpression The expression class + +The next step is to define the expression class itself. In our case, +we want to inherit from \c MatrixBase in order to expose the interface +for dense matrices. In the constructor, we check that we are passed a +column vector (see \ref TopicAssertions) and we store the vector from +which we are going to build the circulant matrix in the member +variable \c m_arg. Finally, the expression class should compute the +size of the corresponding circulant matrix. As explained above, this +is a square matrix with as many columns as the vector used to +construct the matrix. + +\b TO \b DO: What about the \c Nested typedef? It seems to be +necessary; is this only temporary? + +\include make_circulant.cpp.expression + + +\section TopicEvaluator The evaluator + +The last big fragment implements the evaluator for the \c Circulant +expression. The evaluator computes the entries of the circulant +matrix; this is done in the \c .coeff() member function. The entries +are computed by finding the corresponding entry of the vector from +which the circulant matrix is constructed. Getting this entry may +actually be non-trivial when the circulant matrix is constructed from +a vector which is given by a complicated expression, so we use the +evaluator which corresponds to the vector. + +The \c CoeffReadCost constant records the cost of computing an entry +of the circulant matrix; we ignore the index computation and say that +this is the same as the cost of computing an entry of the vector from +which the circulant matrix is constructed. + +In the constructor, we save the evaluator for the column vector which +defined the circulant matrix. We also save the size of that vector; +remember that we can query an expression object to find the size but +not the evaluator. + +\include make_circulant.cpp.evaluator + + +\section TopicEntry The entry point + +After all this, the \c makeCirculant function is very simple. It +simply creates an expression object and returns it. + +\include make_circulant.cpp.entry + + +\section TopicMain A simple main function for testing + +Finally, a short \c main function that shows how the \c makeCirculant +function can be called. + +\include make_circulant.cpp.main + +If all the fragments are combined, the following output is produced, +showing that the program works as expected: + +\include make_circulant.out + +*/ +} + diff --git a/thirdparty/eigen/doc/Overview.dox b/thirdparty/eigen/doc/Overview.dox new file mode 100644 index 000000000..dbb49bd21 --- /dev/null +++ b/thirdparty/eigen/doc/Overview.dox @@ -0,0 +1,30 @@ +namespace Eigen { + +/** \mainpage notitle + +This is the API documentation for Eigen3. You can download it as a tgz archive for offline reading. + +You're already an Eigen2 user? Here is a \link Eigen2ToEigen3 Eigen2 to Eigen3 guide \endlink to help porting your application. + +For a first contact with Eigen, the best place is to have a look at the \link GettingStarted getting started \endlink page that show you how to write and compile your first program with Eigen. + +Then, the \b quick \b reference \b pages give you a quite complete description of the API in a very condensed format that is specially useful to recall the syntax of a particular feature, or to have a quick look at the API. They currently cover the two following feature sets, and more will come in the future: + - \link QuickRefPage [QuickRef] Dense matrix and array manipulations \endlink + - \link SparseQuickRefPage [QuickRef] Sparse linear algebra \endlink + +You're a MatLab user? There is also a short ASCII reference with Matlab translations. + +The \b main \b documentation is organized into \em chapters covering different domains of features. +They are themselves composed of \em user \em manual pages describing the different features in a comprehensive way, and \em reference pages that gives you access to the API documentation through the related Eigen's \em modules and \em classes. + +Under the \subpage UserManual_CustomizingEigen section, you will find discussions and examples on extending %Eigen's features and supporting custom scalar types. + +Under the \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more... + +Finally, do not miss the search engine, useful to quickly get to the documentation of a given class or function. + +Want more? Checkout the \em unsupported \em modules documentation. + +*/ + +} diff --git a/thirdparty/eigen/doc/PassingByValue.dox b/thirdparty/eigen/doc/PassingByValue.dox new file mode 100644 index 000000000..bf4d0ef4b --- /dev/null +++ b/thirdparty/eigen/doc/PassingByValue.dox @@ -0,0 +1,40 @@ +namespace Eigen { + +/** \eigenManualPage TopicPassingByValue Passing Eigen objects by value to functions + +Passing objects by value is almost always a very bad idea in C++, as this means useless copies, and one should pass them by reference instead. + +With Eigen, this is even more important: passing \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" by value is not only inefficient, it can be illegal or make your program crash! And the reason is that these Eigen objects have alignment modifiers that aren't respected when they are passed by value. + +So for example, a function like this, where v is passed by value: + +\code +void my_function(Eigen::Vector2d v); +\endcode + +needs to be rewritten as follows, passing v by reference: + +\code +void my_function(const Eigen::Vector2d& v); +\endcode + +Likewise if you have a class having a Eigen object as member: + +\code +struct Foo +{ + Eigen::Vector2d v; +}; +void my_function(Foo v); +\endcode + +This function also needs to be rewritten like this: +\code +void my_function(const Foo& v); +\endcode + +Note that on the other hand, there is no problem with functions that return objects by value. + +*/ + +} diff --git a/thirdparty/eigen/doc/Pitfalls.dox b/thirdparty/eigen/doc/Pitfalls.dox new file mode 100644 index 000000000..cf42effef --- /dev/null +++ b/thirdparty/eigen/doc/Pitfalls.dox @@ -0,0 +1,38 @@ +namespace Eigen { + +/** \page TopicPitfalls Common pitfalls + +\section TopicPitfalls_template_keyword Compilation error with template methods + +See this \link TopicTemplateKeyword page \endlink. + +\section TopicPitfalls_auto_keyword C++11 and the auto keyword + +In short: do not use the auto keywords with Eigen's expressions, unless you are 100% sure about what you are doing. In particular, do not use the auto keyword as a replacement for a Matrix<> type. Here is an example: + +\code +MatrixXd A, B; +auto C = A*B; +for(...) { ... w = C * v; ...} +\endcode + +In this example, the type of C is not a MatrixXd but an abstract expression representing a matrix product and storing references to A and B. Therefore, the product of A*B will be carried out multiple times, once per iteration of the for loop. Moreover, if the coefficients of A or B change during the iteration, then C will evaluate to different values. + +Here is another example leading to a segfault: +\code +auto C = ((A+B).eval()).transpose(); +// do something with C +\endcode +The problem is that eval() returns a temporary object (in this case a MatrixXd) which is then referenced by the Transpose<> expression. However, this temporary is deleted right after the first line, and there the C expression reference a dead object. The same issue might occur when sub expressions are automatically evaluated by Eigen as in the following example: +\code +VectorXd u, v; +auto C = u + (A*v).normalized(); +// do something with C +\endcode +where the normalized() method has to evaluate the expensive product A*v to avoid evaluating it twice. On the other hand, the following example is perfectly fine: +\code +auto C = (u + (A*v).normalized()).eval(); +\endcode +In this case, C will be a regular VectorXd object. +*/ +} diff --git a/thirdparty/eigen/doc/PreprocessorDirectives.dox b/thirdparty/eigen/doc/PreprocessorDirectives.dox new file mode 100644 index 000000000..2f9c4c370 --- /dev/null +++ b/thirdparty/eigen/doc/PreprocessorDirectives.dox @@ -0,0 +1,166 @@ +namespace Eigen { + +/** \page TopicPreprocessorDirectives Preprocessor directives + +You can control some aspects of %Eigen by defining the preprocessor tokens using \c \#define. These macros +should be defined before any %Eigen headers are included. Often they are best set in the project options. + +This page lists the preprocessor tokens recognized by %Eigen. + +\eigenAutoToc + + +\section TopicPreprocessorDirectivesMajor Macros with major effects + +These macros have a major effect and typically break the API (Application Programming Interface) and/or the +ABI (Application Binary Interface). This can be rather dangerous: if parts of your program are compiled with +one option, and other parts (or libraries that you use) are compiled with another option, your program may +fail to link or exhibit subtle bugs. Nevertheless, these options can be useful for people who know what they +are doing. + + - \b EIGEN2_SUPPORT and \b EIGEN2_SUPPORT_STAGEnn_xxx are disabled starting from the 3.3 release. + Defining one of these will raise a compile-error. If you need to compile Eigen2 code, + check this site. + - \b EIGEN_DEFAULT_DENSE_INDEX_TYPE - the type for column and row indices in matrices, vectors and array + (DenseBase::Index). Set to \c std::ptrdiff_t by default. + - \b EIGEN_DEFAULT_IO_FORMAT - the IOFormat to use when printing a matrix if no %IOFormat is specified. + Defaults to the %IOFormat constructed by the default constructor IOFormat::IOFormat(). + - \b EIGEN_INITIALIZE_MATRICES_BY_ZERO - if defined, all entries of newly constructed matrices and arrays are + initialized to zero, as are new entries in matrices and arrays after resizing. Not defined by default. + \warning The unary (resp. binary) constructor of \c 1x1 (resp. \c 2x1 or \c 1x2) fixed size matrices is + always interpreted as an initialization constructor where the argument(s) are the coefficient values + and not the sizes. For instance, \code Vector2d v(2,1); \endcode will create a vector with coeficients [2,1], + and \b not a \c 2x1 vector initialized with zeros (i.e., [0,0]). If such cases might occur, then it is + recommended to use the default constructor with a explicit call to resize: + \code + Matrix v; + v.resize(size); + Matrix m; + m.resize(rows,cols); + \endcode + - \b EIGEN_INITIALIZE_MATRICES_BY_NAN - if defined, all entries of newly constructed matrices and arrays are + initialized to NaN, as are new entries in matrices and arrays after resizing. This option is especially + useful for debugging purpose, though a memory tool like valgrind is + preferable. Not defined by default. + \warning See the documentation of \c EIGEN_INITIALIZE_MATRICES_BY_ZERO for a discussion on a limitations + of these macros when applied to \c 1x1, \c 1x2, and \c 2x1 fixed-size matrices. + - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment + a = b have to be of the same size; otherwise, %Eigen automatically resizes \c a so that it is of + the correct size. Not defined by default. + + +\section TopicPreprocessorDirectivesCppVersion C++ standard features + +By default, %Eigen strive to automatically detect and enable langage features at compile-time based on +the information provided by the compiler. + + - \b EIGEN_MAX_CPP_VER - disables usage of C++ features requiring a version greater than EIGEN_MAX_CPP_VER. + Possible values are: 03, 11, 14, 17, etc. If not defined (the default), %Eigen enables all features supported + by the compiler. + +Individual features can be explicitly enabled or disabled by defining the following token to 0 or 1 respectively. +For instance, one might limit the C++ version to C++03 by defining EIGEN_MAX_CPP_VER=03, but still enable C99 math +functions by defining EIGEN_HAS_C99_MATH=1. + + - \b EIGEN_HAS_C99_MATH - controls the usage of C99 math functions such as erf, erfc, lgamma, etc. + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CXX11_MATH - controls the implementation of some functions such as round, logp1, isinf, isnan, etc. + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_RVALUE_REFERENCES - defines whetehr rvalue references are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_STD_RESULT_OF - defines whether std::result_of is supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_VARIADIC_TEMPLATES - defines whether variadic templates are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CONSTEXPR - defines whether relaxed const expression are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<14. + - \b EIGEN_HAS_CXX11_CONTAINERS - defines whether STL's containers follows C++11 specifications + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CXX11_NOEXCEPT - defines whether noexcept is supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + +\section TopicPreprocessorDirectivesAssertions Assertions + +The %Eigen library contains many assertions to guard against programming errors, both at compile time and at +run time. However, these assertions do cost time and can thus be turned off. + + - \b EIGEN_NO_DEBUG - disables %Eigen's assertions if defined. Not defined by default, unless the + \c NDEBUG macro is defined (this is a standard C++ macro which disables all asserts). + - \b EIGEN_NO_STATIC_ASSERT - if defined, compile-time static assertions are replaced by runtime assertions; + this saves compilation time. Not defined by default. + - \b eigen_assert - macro with one argument that is used inside %Eigen for assertions. By default, it is + basically defined to be \c assert, which aborts the program if the assertion is violated. Redefine this + macro if you want to do something else, like throwing an exception. + - \b EIGEN_MPL2_ONLY - disable non MPL2 compatible features, or in other words disable the features which + are still under the LGPL. + + +\section TopicPreprocessorDirectivesPerformance Alignment, vectorization and performance tweaking + + - \b EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system \c malloc already + returns aligned buffers. In not defined, then this information is automatically deduced from the compiler + and system preprocessor tokens. + - \b EIGEN_DONT_ALIGN - disables alignment completely. %Eigen will not try to align its objects and does not + expect that any objects passed to it are aligned. This will turn off vectorization. Not defined by default. + - \b EIGEN_DONT_ALIGN_STATICALLY - disables alignment of arrays on the stack. Not defined by default, unless + \c EIGEN_DONT_ALIGN is defined. + - \b EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP. + See \ref TopicMultiThreading for details. + - \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless + alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN. + - \b EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled). + If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned + small fixed size vectors or matrices) + - \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently + enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default. + Define it to 0 to disable. + - \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable + unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not + correspond to the number of iterations or the number of instructions. The default is value 100. + - \b EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal + temporary buffers, dynamic memory allocation is employed as a fall back. For fixed-size matrices or arrays, exceeding + this threshold raises a compile time assertion. Use 0 to set no limit. Default is 128 KB. + + +\section TopicPreprocessorDirectivesPlugins Plugins + +It is possible to add new methods to many fundamental classes in %Eigen by writing a plugin. As explained in +the section \ref ExtendingMatrixBase, the plugin is specified by defining a \c EIGEN_xxx_PLUGIN macro. The +following macros are supported; none of them are defined by default. + + - \b EIGEN_ARRAY_PLUGIN - filename of plugin for extending the Array class. + - \b EIGEN_ARRAYBASE_PLUGIN - filename of plugin for extending the ArrayBase class. + - \b EIGEN_CWISE_PLUGIN - filename of plugin for extending the Cwise class. + - \b EIGEN_DENSEBASE_PLUGIN - filename of plugin for extending the DenseBase class. + - \b EIGEN_DYNAMICSPARSEMATRIX_PLUGIN - filename of plugin for extending the DynamicSparseMatrix class. + - \b EIGEN_MATRIX_PLUGIN - filename of plugin for extending the Matrix class. + - \b EIGEN_MATRIXBASE_PLUGIN - filename of plugin for extending the MatrixBase class. + - \b EIGEN_PLAINOBJECTBASE_PLUGIN - filename of plugin for extending the PlainObjectBase class. + - \b EIGEN_MAPBASE_PLUGIN - filename of plugin for extending the MapBase class. + - \b EIGEN_QUATERNION_PLUGIN - filename of plugin for extending the Quaternion class. + - \b EIGEN_QUATERNIONBASE_PLUGIN - filename of plugin for extending the QuaternionBase class. + - \b EIGEN_SPARSEMATRIX_PLUGIN - filename of plugin for extending the SparseMatrix class. + - \b EIGEN_SPARSEMATRIXBASE_PLUGIN - filename of plugin for extending the SparseMatrixBase class. + - \b EIGEN_SPARSEVECTOR_PLUGIN - filename of plugin for extending the SparseVector class. + - \b EIGEN_TRANSFORM_PLUGIN - filename of plugin for extending the Transform class. + - \b EIGEN_FUNCTORS_PLUGIN - filename of plugin for adding new functors and specializations of functor_traits. + + +\section TopicPreprocessorDirectivesDevelopers Macros for Eigen developers + +These macros are mainly meant for people developing %Eigen and for testing purposes. Even though, they might be useful for power users and the curious for debugging and testing purpose, they \b should \b not \b be \b used by real-word code. + + - \b EIGEN_DEFAULT_TO_ROW_MAJOR - when defined, the default storage order for matrices becomes row-major + instead of column-major. Not defined by default. + - \b EIGEN_INTERNAL_DEBUGGING - if defined, enables assertions in %Eigen's internal routines. This is useful + for debugging %Eigen itself. Not defined by default. + - \b EIGEN_NO_MALLOC - if defined, any request from inside the %Eigen to allocate memory from the heap + results in an assertion failure. This is useful to check that some routine does not allocate memory + dynamically. Not defined by default. + - \b EIGEN_RUNTIME_NO_MALLOC - if defined, a new switch is introduced which can be turned on and off by + calling set_is_malloc_allowed(bool). If malloc is not allowed and %Eigen tries to allocate memory + dynamically anyway, an assertion failure results. Not defined by default. + +*/ + +} diff --git a/thirdparty/eigen/doc/QuickReference.dox b/thirdparty/eigen/doc/QuickReference.dox new file mode 100644 index 000000000..e19c7e3a4 --- /dev/null +++ b/thirdparty/eigen/doc/QuickReference.dox @@ -0,0 +1,785 @@ +namespace Eigen { + +/** \eigenManualPage QuickRefPage Quick reference guide + +\eigenAutoToc + +
+ +top +\section QuickRef_Headers Modules and Header files + +The Eigen library is divided in a Core module and several additional modules. Each module has a corresponding header file which has to be included in order to use the module. The \c %Dense and \c Eigen header files are provided to conveniently gain access to several modules at once. + + + + + + + + + + + + + + +
ModuleHeader fileContents
\link Core_Module Core \endlink\code#include \endcodeMatrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation
\link Geometry_Module Geometry \endlink\code#include \endcodeTransform, Translation, Scaling, Rotation2D and 3D rotations (Quaternion, AngleAxis)
\link LU_Module LU \endlink\code#include \endcodeInverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)
\link Cholesky_Module Cholesky \endlink\code#include \endcodeLLT and LDLT Cholesky factorization with solver
\link Householder_Module Householder \endlink\code#include \endcodeHouseholder transformations; this module is used by several linear algebra modules
\link SVD_Module SVD \endlink\code#include \endcodeSVD decompositions with least-squares solver (JacobiSVD, BDCSVD)
\link QR_Module QR \endlink\code#include \endcodeQR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)
\link Eigenvalues_Module Eigenvalues \endlink\code#include \endcodeEigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)
\link Sparse_Module Sparse \endlink\code#include \endcode%Sparse matrix storage and related basic linear algebra (SparseMatrix, SparseVector) \n (see \ref SparseQuickRefPage for details on sparse modules)
\code#include \endcodeIncludes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files
\code#include \endcodeIncludes %Dense and %Sparse header files (the whole Eigen library)
+ +top +\section QuickRef_Types Array, matrix and vector types + + +\b Recall: Eigen provides two kinds of dense objects: mathematical matrices and vectors which are both represented by the template class Matrix, and general 1D and 2D arrays represented by the template class Array: +\code +typedef Matrix MyMatrixType; +typedef Array MyArrayType; +\endcode + +\li \c Scalar is the scalar type of the coefficients (e.g., \c float, \c double, \c bool, \c int, etc.). +\li \c RowsAtCompileTime and \c ColsAtCompileTime are the number of rows and columns of the matrix as known at compile-time or \c Dynamic. +\li \c Options can be \c ColMajor or \c RowMajor, default is \c ColMajor. (see class Matrix for more options) + +All combinations are allowed: you can have a matrix with a fixed number of rows and a dynamic number of columns, etc. The following are all valid: +\code +Matrix // Dynamic number of columns (heap allocation) +Matrix // Dynamic number of rows (heap allocation) +Matrix // Fully dynamic, row major (heap allocation) +Matrix // Fully fixed (usually allocated on stack) +\endcode + +In most cases, you can simply use one of the convenience typedefs for \ref matrixtypedefs "matrices" and \ref arraytypedefs "arrays". Some examples: + + + +
MatricesArrays
\code +Matrix <=> MatrixXf +Matrix <=> VectorXd +Matrix <=> RowVectorXi +Matrix <=> Matrix3f +Matrix <=> Vector4f +\endcode\code +Array <=> ArrayXXf +Array <=> ArrayXd +Array <=> RowArrayXi +Array <=> Array33f +Array <=> Array4f +\endcode
+ +Conversion between the matrix and array worlds: +\code +Array44f a1, a1; +Matrix4f m1, m2; +m1 = a1 * a2; // coeffwise product, implicit conversion from array to matrix. +a1 = m1 * m2; // matrix product, implicit conversion from matrix to array. +a2 = a1 + m1.array(); // mixing array and matrix is forbidden +m2 = a1.matrix() + m1; // and explicit conversion is required. +ArrayWrapper m1a(m1); // m1a is an alias for m1.array(), they share the same coefficients +MatrixWrapper a1m(a1); +\endcode + +In the rest of this document we will use the following symbols to emphasize the features which are specifics to a given kind of object: +\li \matrixworld linear algebra matrix and vector only +\li \arrayworld array objects only + +\subsection QuickRef_Basics Basic matrix manipulation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
1D objects2D objectsNotes
Constructors\code +Vector4d v4; +Vector2f v1(x, y); +Array3i v2(x, y, z); +Vector4d v3(x, y, z, w); + +VectorXf v5; // empty object +ArrayXf v6(size); +\endcode\code +Matrix4f m1; + + + + +MatrixXf m5; // empty object +MatrixXf m6(nb_rows, nb_columns); +\endcode +By default, the coefficients \n are left uninitialized
Comma initializer\code +Vector3f v1; v1 << x, y, z; +ArrayXf v2(4); v2 << 1, 2, 3, 4; + +\endcode\code +Matrix3f m1; m1 << 1, 2, 3, + 4, 5, 6, + 7, 8, 9; +\endcode
Comma initializer (bis) +\include Tutorial_commainit_02.cpp + +output: +\verbinclude Tutorial_commainit_02.out +
Runtime info\code +vector.size(); + +vector.innerStride(); +vector.data(); +\endcode\code +matrix.rows(); matrix.cols(); +matrix.innerSize(); matrix.outerSize(); +matrix.innerStride(); matrix.outerStride(); +matrix.data(); +\endcodeInner/Outer* are storage order dependent
Compile-time info\code +ObjectType::Scalar ObjectType::RowsAtCompileTime +ObjectType::RealScalar ObjectType::ColsAtCompileTime +ObjectType::Index ObjectType::SizeAtCompileTime +\endcode
Resizing\code +vector.resize(size); + + +vector.resizeLike(other_vector); +vector.conservativeResize(size); +\endcode\code +matrix.resize(nb_rows, nb_cols); +matrix.resize(Eigen::NoChange, nb_cols); +matrix.resize(nb_rows, Eigen::NoChange); +matrix.resizeLike(other_matrix); +matrix.conservativeResize(nb_rows, nb_cols); +\endcodeno-op if the new sizes match,
otherwise data are lost

resizing with data preservation
Coeff access with \n range checking\code +vector(i) vector.x() +vector[i] vector.y() + vector.z() + vector.w() +\endcode\code +matrix(i,j) +\endcodeRange checking is disabled if \n NDEBUG or EIGEN_NO_DEBUG is defined
Coeff access without \n range checking\code +vector.coeff(i) +vector.coeffRef(i) +\endcode\code +matrix.coeff(i,j) +matrix.coeffRef(i,j) +\endcode
Assignment/copy\code +object = expression; +object_of_float = expression_of_double.cast(); +\endcodethe destination is automatically resized (if possible)
+ +\subsection QuickRef_PredefMat Predefined Matrices + + + + + + + + + + + + + + + + + + + +
Fixed-size matrix or vectorDynamic-size matrixDynamic-size vector
+\code +typedef {Matrix3f|Array33f} FixedXD; +FixedXD x; + +x = FixedXD::Zero(); +x = FixedXD::Ones(); +x = FixedXD::Constant(value); +x = FixedXD::Random(); +x = FixedXD::LinSpaced(size, low, high); + +x.setZero(); +x.setOnes(); +x.setConstant(value); +x.setRandom(); +x.setLinSpaced(size, low, high); +\endcode + +\code +typedef {MatrixXf|ArrayXXf} Dynamic2D; +Dynamic2D x; + +x = Dynamic2D::Zero(rows, cols); +x = Dynamic2D::Ones(rows, cols); +x = Dynamic2D::Constant(rows, cols, value); +x = Dynamic2D::Random(rows, cols); +N/A + +x.setZero(rows, cols); +x.setOnes(rows, cols); +x.setConstant(rows, cols, value); +x.setRandom(rows, cols); +N/A +\endcode + +\code +typedef {VectorXf|ArrayXf} Dynamic1D; +Dynamic1D x; + +x = Dynamic1D::Zero(size); +x = Dynamic1D::Ones(size); +x = Dynamic1D::Constant(size, value); +x = Dynamic1D::Random(size); +x = Dynamic1D::LinSpaced(size, low, high); + +x.setZero(size); +x.setOnes(size); +x.setConstant(size, value); +x.setRandom(size); +x.setLinSpaced(size, low, high); +\endcode +
Identity and \link MatrixBase::Unit basis vectors \endlink \matrixworld
+\code +x = FixedXD::Identity(); +x.setIdentity(); + +Vector3f::UnitX() // 1 0 0 +Vector3f::UnitY() // 0 1 0 +Vector3f::UnitZ() // 0 0 1 +\endcode + +\code +x = Dynamic2D::Identity(rows, cols); +x.setIdentity(rows, cols); + + + +N/A +\endcode + \code +N/A + + +VectorXf::Unit(size,i) +VectorXf::Unit(4,1) == Vector4f(0,1,0,0) + == Vector4f::UnitY() +\endcode +
+ + + +\subsection QuickRef_Map Mapping external arrays + + + + + + + + + + +
Contiguous \n memory\code +float data[] = {1,2,3,4}; +Map v1(data); // uses v1 as a Vector3f object +Map v2(data,3); // uses v2 as a ArrayXf object +Map m1(data); // uses m1 as a Array22f object +Map m2(data,2,2); // uses m2 as a MatrixXf object +\endcode
Typical usage \n of strides\code +float data[] = {1,2,3,4,5,6,7,8,9}; +Map > v1(data,3); // = [1,3,5] +Map > v2(data,3,InnerStride<>(3)); // = [1,4,7] +Map > m2(data,2,3); // both lines |1,4,7| +Map > m1(data,2,3,OuterStride<>(3)); // are equal to: |2,5,8| +\endcode
+ + +top +\section QuickRef_ArithmeticOperators Arithmetic Operators + + + + + + + + + + + + +
+add \n subtract\code +mat3 = mat1 + mat2; mat3 += mat1; +mat3 = mat1 - mat2; mat3 -= mat1;\endcode +
+scalar product\code +mat3 = mat1 * s1; mat3 *= s1; mat3 = s1 * mat1; +mat3 = mat1 / s1; mat3 /= s1;\endcode +
+matrix/vector \n products \matrixworld\code +col2 = mat1 * col1; +row2 = row1 * mat1; row1 *= mat1; +mat3 = mat1 * mat2; mat3 *= mat1; \endcode +
+transposition \n adjoint \matrixworld\code +mat1 = mat2.transpose(); mat1.transposeInPlace(); +mat1 = mat2.adjoint(); mat1.adjointInPlace(); +\endcode +
+\link MatrixBase::dot() dot \endlink product \n inner product \matrixworld\code +scalar = vec1.dot(vec2); +scalar = col1.adjoint() * col2; +scalar = (col1.adjoint() * col2).value();\endcode +
+outer product \matrixworld\code +mat = col1 * col2.transpose();\endcode +
+\link MatrixBase::norm() norm \endlink \n \link MatrixBase::normalized() normalization \endlink \matrixworld\code +scalar = vec1.norm(); scalar = vec1.squaredNorm() +vec2 = vec1.normalized(); vec1.normalize(); // inplace \endcode +
+\link MatrixBase::cross() cross product \endlink \matrixworld\code +#include +vec3 = vec1.cross(vec2);\endcode
+ +top +\section QuickRef_Coeffwise Coefficient-wise \& Array operators + +In addition to the aforementioned operators, Eigen supports numerous coefficient-wise operator and functions. +Most of them unambiguously makes sense in array-world\arrayworld. The following operators are readily available for arrays, +or available through .array() for vectors and matrices: + + + + + +
Arithmetic operators\code +array1 * array2 array1 / array2 array1 *= array2 array1 /= array2 +array1 + scalar array1 - scalar array1 += scalar array1 -= scalar +\endcode
Comparisons\code +array1 < array2 array1 > array2 array1 < scalar array1 > scalar +array1 <= array2 array1 >= array2 array1 <= scalar array1 >= scalar +array1 == array2 array1 != array2 array1 == scalar array1 != scalar +array1.min(array2) array1.max(array2) array1.min(scalar) array1.max(scalar) +\endcode
Trigo, power, and \n misc functions \n and the STL-like variants\code +array1.abs2() +array1.abs() abs(array1) +array1.sqrt() sqrt(array1) +array1.log() log(array1) +array1.log10() log10(array1) +array1.exp() exp(array1) +array1.pow(array2) pow(array1,array2) +array1.pow(scalar) pow(array1,scalar) + pow(scalar,array2) +array1.square() +array1.cube() +array1.inverse() + +array1.sin() sin(array1) +array1.cos() cos(array1) +array1.tan() tan(array1) +array1.asin() asin(array1) +array1.acos() acos(array1) +array1.atan() atan(array1) +array1.sinh() sinh(array1) +array1.cosh() cosh(array1) +array1.tanh() tanh(array1) +array1.arg() arg(array1) + +array1.floor() floor(array1) +array1.ceil() ceil(array1) +array1.round() round(aray1) + +array1.isFinite() isfinite(array1) +array1.isInf() isinf(array1) +array1.isNaN() isnan(array1) +\endcode +
+ + +The following coefficient-wise operators are available for all kind of expressions (matrices, vectors, and arrays), and for both real or complex scalar types: + + + + +
Eigen's APISTL-like APIs\arrayworld Comments
\code +mat1.real() +mat1.imag() +mat1.conjugate() +\endcode +\code +real(array1) +imag(array1) +conj(array1) +\endcode + +\code + // read-write, no-op for real expressions + // read-only for real, read-write for complexes + // no-op for real expressions +\endcode +
+ +Some coefficient-wise operators are readily available for for matrices and vectors through the following cwise* methods: + + + +
Matrix API \matrixworldVia Array conversions
\code +mat1.cwiseMin(mat2) mat1.cwiseMin(scalar) +mat1.cwiseMax(mat2) mat1.cwiseMax(scalar) +mat1.cwiseAbs2() +mat1.cwiseAbs() +mat1.cwiseSqrt() +mat1.cwiseInverse() +mat1.cwiseProduct(mat2) +mat1.cwiseQuotient(mat2) +mat1.cwiseEqual(mat2) mat1.cwiseEqual(scalar) +mat1.cwiseNotEqual(mat2) +\endcode +\code +mat1.array().min(mat2.array()) mat1.array().min(scalar) +mat1.array().max(mat2.array()) mat1.array().max(scalar) +mat1.array().abs2() +mat1.array().abs() +mat1.array().sqrt() +mat1.array().inverse() +mat1.array() * mat2.array() +mat1.array() / mat2.array() +mat1.array() == mat2.array() mat1.array() == scalar +mat1.array() != mat2.array() +\endcode
+The main difference between the two API is that the one based on cwise* methods returns an expression in the matrix world, +while the second one (based on .array()) returns an array expression. +Recall that .array() has no cost, it only changes the available API and interpretation of the data. + +It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun (c++03), std::ref (c++11), or lambdas (c++11): +\code +mat1.unaryExpr(std::ptr_fun(foo)); +mat1.unaryExpr(std::ref(foo)); +mat1.unaryExpr([](double x) { return foo(x); }); +\endcode + + +top +\section QuickRef_Reductions Reductions + +Eigen provides several reduction methods such as: +\link DenseBase::minCoeff() minCoeff() \endlink, \link DenseBase::maxCoeff() maxCoeff() \endlink, +\link DenseBase::sum() sum() \endlink, \link DenseBase::prod() prod() \endlink, +\link MatrixBase::trace() trace() \endlink \matrixworld, +\link MatrixBase::norm() norm() \endlink \matrixworld, \link MatrixBase::squaredNorm() squaredNorm() \endlink \matrixworld, +\link DenseBase::all() all() \endlink, and \link DenseBase::any() any() \endlink. +All reduction operations can be done matrix-wise, +\link DenseBase::colwise() column-wise \endlink or +\link DenseBase::rowwise() row-wise \endlink. Usage example: + + + + +
\code + 5 3 1 +mat = 2 7 8 + 9 4 6 \endcode + \code mat.minCoeff(); \endcode\code 1 \endcode
\code mat.colwise().minCoeff(); \endcode\code 2 3 1 \endcode
\code mat.rowwise().minCoeff(); \endcode\code +1 +2 +4 +\endcode
+ +Special versions of \link DenseBase::minCoeff(IndexType*,IndexType*) const minCoeff \endlink and \link DenseBase::maxCoeff(IndexType*,IndexType*) const maxCoeff \endlink: +\code +int i, j; +s = vector.minCoeff(&i); // s == vector[i] +s = matrix.maxCoeff(&i, &j); // s == matrix(i,j) +\endcode +Typical use cases of all() and any(): +\code +if((array1 > 0).all()) ... // if all coefficients of array1 are greater than 0 ... +if((array1 < array2).any()) ... // if there exist a pair i,j such that array1(i,j) < array2(i,j) ... +\endcode + + +top\section QuickRef_Blocks Sub-matrices + +Read-write access to a \link DenseBase::col(Index) column \endlink +or a \link DenseBase::row(Index) row \endlink of a matrix (or array): +\code +mat1.row(i) = mat2.col(j); +mat1.col(j1).swap(mat1.col(j2)); +\endcode + +Read-write access to sub-vectors: + + + + + + + + + + + + + + + + + +
Default versionsOptimized versions when the size \n is known at compile time
\code vec1.head(n)\endcode\code vec1.head()\endcodethe first \c n coeffs
\code vec1.tail(n)\endcode\code vec1.tail()\endcodethe last \c n coeffs
\code vec1.segment(pos,n)\endcode\code vec1.segment(pos)\endcodethe \c n coeffs in the \n range [\c pos : \c pos + \c n - 1]
+ +Read-write access to sub-matrices:
\code mat1.block(i,j,rows,cols)\endcode + \link DenseBase::block(Index,Index,Index,Index) (more) \endlink\code mat1.block(i,j)\endcode + \link DenseBase::block(Index,Index) (more) \endlinkthe \c rows x \c cols sub-matrix \n starting from position (\c i,\c j)
\code + mat1.topLeftCorner(rows,cols) + mat1.topRightCorner(rows,cols) + mat1.bottomLeftCorner(rows,cols) + mat1.bottomRightCorner(rows,cols)\endcode + \code + mat1.topLeftCorner() + mat1.topRightCorner() + mat1.bottomLeftCorner() + mat1.bottomRightCorner()\endcode + the \c rows x \c cols sub-matrix \n taken in one of the four corners
\code + mat1.topRows(rows) + mat1.bottomRows(rows) + mat1.leftCols(cols) + mat1.rightCols(cols)\endcode + \code + mat1.topRows() + mat1.bottomRows() + mat1.leftCols() + mat1.rightCols()\endcode + specialized versions of block() \n when the block fit two corners
+ + + +top\section QuickRef_Misc Miscellaneous operations + +\subsection QuickRef_Reverse Reverse +Vectors, rows, and/or columns of a matrix can be reversed (see DenseBase::reverse(), DenseBase::reverseInPlace(), VectorwiseOp::reverse()). +\code +vec.reverse() mat.colwise().reverse() mat.rowwise().reverse() +vec.reverseInPlace() +\endcode + +\subsection QuickRef_Replicate Replicate +Vectors, matrices, rows, and/or columns can be replicated in any direction (see DenseBase::replicate(), VectorwiseOp::replicate()) +\code +vec.replicate(times) vec.replicate +mat.replicate(vertical_times, horizontal_times) mat.replicate() +mat.colwise().replicate(vertical_times, horizontal_times) mat.colwise().replicate() +mat.rowwise().replicate(vertical_times, horizontal_times) mat.rowwise().replicate() +\endcode + + +top\section QuickRef_DiagTriSymm Diagonal, Triangular, and Self-adjoint matrices +(matrix world \matrixworld) + +\subsection QuickRef_Diagonal Diagonal matrices + + + + + + + + + + + + + +
OperationCode
+view a vector \link MatrixBase::asDiagonal() as a diagonal matrix \endlink \n \code +mat1 = vec1.asDiagonal();\endcode +
+Declare a diagonal matrix\code +DiagonalMatrix diag1(size); +diag1.diagonal() = vector;\endcode +
Access the \link MatrixBase::diagonal() diagonal \endlink and \link MatrixBase::diagonal(Index) super/sub diagonals \endlink of a matrix as a vector (read/write)\code +vec1 = mat1.diagonal(); mat1.diagonal() = vec1; // main diagonal +vec1 = mat1.diagonal(+n); mat1.diagonal(+n) = vec1; // n-th super diagonal +vec1 = mat1.diagonal(-n); mat1.diagonal(-n) = vec1; // n-th sub diagonal +vec1 = mat1.diagonal<1>(); mat1.diagonal<1>() = vec1; // first super diagonal +vec1 = mat1.diagonal<-2>(); mat1.diagonal<-2>() = vec1; // second sub diagonal +\endcode
Optimized products and inverse\code +mat3 = scalar * diag1 * mat1; +mat3 += scalar * mat1 * vec1.asDiagonal(); +mat3 = vec1.asDiagonal().inverse() * mat1 +mat3 = mat1 * diag1.inverse() +\endcode
+ +\subsection QuickRef_TriangularView Triangular views + +TriangularView gives a view on a triangular part of a dense matrix and allows to perform optimized operations on it. The opposite triangular part is never referenced and can be used to store other information. + +\note The .triangularView() template member function requires the \c template keyword if it is used on an +object of a type that depends on a template parameter; see \ref TopicTemplateKeyword for details. + + + + + + + + +
OperationCode
+Reference to a triangular with optional \n +unit or null diagonal (read/write): +\code +m.triangularView() +\endcode \n +\c Xxx = ::Upper, ::Lower, ::StrictlyUpper, ::StrictlyLower, ::UnitUpper, ::UnitLower +
+Writing to a specific triangular part:\n (only the referenced triangular part is evaluated) +\code +m1.triangularView() = m2 + m3 \endcode +
+Conversion to a dense matrix setting the opposite triangular part to zero: +\code +m2 = m1.triangularView()\endcode +
+Products: +\code +m3 += s1 * m1.adjoint().triangularView() * m2 +m3 -= s1 * m2.conjugate() * m1.adjoint().triangularView() \endcode +
+Solving linear equations:\n +\f$ M_2 := L_1^{-1} M_2 \f$ \n +\f$ M_3 := {L_1^*}^{-1} M_3 \f$ \n +\f$ M_4 := M_4 U_1^{-1} \f$ +\n \code +L1.triangularView().solveInPlace(M2) +L1.triangularView().adjoint().solveInPlace(M3) +U1.triangularView().solveInPlace(M4)\endcode +
+ +\subsection QuickRef_SelfadjointMatrix Symmetric/selfadjoint views + +Just as for triangular matrix, you can reference any triangular part of a square matrix to see it as a selfadjoint +matrix and perform special and optimized operations. Again the opposite triangular part is never referenced and can be +used to store other information. + +\note The .selfadjointView() template member function requires the \c template keyword if it is used on an +object of a type that depends on a template parameter; see \ref TopicTemplateKeyword for details. + + + + + + + + +
OperationCode
+Conversion to a dense matrix: +\code +m2 = m.selfadjointView();\endcode +
+Product with another general matrix or vector: +\code +m3 = s1 * m1.conjugate().selfadjointView() * m3; +m3 -= s1 * m3.adjoint() * m1.selfadjointView();\endcode +
+Rank 1 and rank K update: \n +\f$ upper(M_1) \mathrel{{+}{=}} s_1 M_2 M_2^* \f$ \n +\f$ lower(M_1) \mathbin{{-}{=}} M_2^* M_2 \f$ +\n \code +M1.selfadjointView().rankUpdate(M2,s1); +M1.selfadjointView().rankUpdate(M2.adjoint(),-1); \endcode +
+Rank 2 update: (\f$ M \mathrel{{+}{=}} s u v^* + s v u^* \f$) +\code +M.selfadjointView().rankUpdate(u,v,s); +\endcode +
+Solving linear equations:\n(\f$ M_2 := M_1^{-1} M_2 \f$) +\code +// via a standard Cholesky factorization +m2 = m1.selfadjointView().llt().solve(m2); +// via a Cholesky factorization with pivoting +m2 = m1.selfadjointView().ldlt().solve(m2); +\endcode +
+ +*/ + +/* + + + + + + + + + + + + + +
+\link MatrixBase::asDiagonal() make a diagonal matrix \endlink \n from a vector \code +mat1 = vec1.asDiagonal();\endcode +
+Declare a diagonal matrix\code +DiagonalMatrix diag1(size); +diag1.diagonal() = vector;\endcode +
Access \link MatrixBase::diagonal() the diagonal and super/sub diagonals of a matrix \endlink as a vector (read/write)\code +vec1 = mat1.diagonal(); mat1.diagonal() = vec1; // main diagonal +vec1 = mat1.diagonal(+n); mat1.diagonal(+n) = vec1; // n-th super diagonal +vec1 = mat1.diagonal(-n); mat1.diagonal(-n) = vec1; // n-th sub diagonal +vec1 = mat1.diagonal<1>(); mat1.diagonal<1>() = vec1; // first super diagonal +vec1 = mat1.diagonal<-2>(); mat1.diagonal<-2>() = vec1; // second sub diagonal +\endcode
View on a triangular part of a matrix (read/write)\code +mat2 = mat1.triangularView(); +// Xxx = Upper, Lower, StrictlyUpper, StrictlyLower, UnitUpper, UnitLower +mat1.triangularView() = mat2 + mat3; // only the upper part is evaluated and referenced +\endcode
View a triangular part as a symmetric/self-adjoint matrix (read/write)\code +mat2 = mat1.selfadjointView(); // Xxx = Upper or Lower +mat1.selfadjointView() = mat2 + mat2.adjoint(); // evaluated and write to the upper triangular part only +\endcode
+ +Optimized products: +\code +mat3 += scalar * vec1.asDiagonal() * mat1 +mat3 += scalar * mat1 * vec1.asDiagonal() +mat3.noalias() += scalar * mat1.triangularView() * mat2 +mat3.noalias() += scalar * mat2 * mat1.triangularView() +mat3.noalias() += scalar * mat1.selfadjointView() * mat2 +mat3.noalias() += scalar * mat2 * mat1.selfadjointView() +mat1.selfadjointView().rankUpdate(mat2); +mat1.selfadjointView().rankUpdate(mat2.adjoint(), scalar); +\endcode + +Inverse products: (all are optimized) +\code +mat3 = vec1.asDiagonal().inverse() * mat1 +mat3 = mat1 * diag1.inverse() +mat1.triangularView().solveInPlace(mat2) +mat1.triangularView().solveInPlace(mat2) +mat2 = mat1.selfadjointView().llt().solve(mat2) +\endcode + +*/ +} diff --git a/thirdparty/eigen/doc/QuickStartGuide.dox b/thirdparty/eigen/doc/QuickStartGuide.dox new file mode 100644 index 000000000..ea32c3b3d --- /dev/null +++ b/thirdparty/eigen/doc/QuickStartGuide.dox @@ -0,0 +1,100 @@ +namespace Eigen { + +/** \page GettingStarted Getting started + +\eigenAutoToc + +This is a very short guide on how to get started with Eigen. It has a dual purpose. It serves as a minimal introduction to the Eigen library for people who want to start coding as soon as possible. You can also read this page as the first part of the Tutorial, which explains the library in more detail; in this case you will continue with \ref TutorialMatrixClass. + +\section GettingStartedInstallation How to "install" Eigen? + +In order to use Eigen, you just need to download and extract Eigen's source code (see the wiki for download instructions). In fact, the header files in the \c Eigen subdirectory are the only files required to compile programs using Eigen. The header files are the same for all platforms. It is not necessary to use CMake or install anything. + + +\section GettingStartedFirstProgram A simple first program + +Here is a rather simple program to get you started. + +\include QuickStart_example.cpp + +We will explain the program after telling you how to compile it. + + +\section GettingStartedCompiling Compiling and running your first program + +There is no library to link to. The only thing that you need to keep in mind when compiling the above program is that the compiler must be able to find the Eigen header files. The directory in which you placed Eigen's source code must be in the include path. With GCC you use the -I option to achieve this, so you can compile the program with a command like this: + +\code g++ -I /path/to/eigen/ my_program.cpp -o my_program \endcode + +On Linux or Mac OS X, another option is to symlink or copy the Eigen folder into /usr/local/include/. This way, you can compile the program with: + +\code g++ my_program.cpp -o my_program \endcode + +When you run the program, it produces the following output: + +\include QuickStart_example.out + + +\section GettingStartedExplanation Explanation of the first program + +The Eigen header files define many types, but for simple applications it may be enough to use only the \c MatrixXd type. This represents a matrix of arbitrary size (hence the \c X in \c MatrixXd), in which every entry is a \c double (hence the \c d in \c MatrixXd). See the \ref QuickRef_Types "quick reference guide" for an overview of the different types you can use to represent a matrix. + +The \c Eigen/Dense header file defines all member functions for the MatrixXd type and related types (see also the \ref QuickRef_Headers "table of header files"). All classes and functions defined in this header file (and other Eigen header files) are in the \c Eigen namespace. + +The first line of the \c main function declares a variable of type \c MatrixXd and specifies that it is a matrix with 2 rows and 2 columns (the entries are not initialized). The statement m(0,0) = 3 sets the entry in the top-left corner to 3. You need to use round parentheses to refer to entries in the matrix. As usual in computer science, the index of the first index is 0, as opposed to the convention in mathematics that the first index is 1. + +The following three statements sets the other three entries. The final line outputs the matrix \c m to the standard output stream. + + +\section GettingStartedExample2 Example 2: Matrices and vectors + +Here is another example, which combines matrices with vectors. Concentrate on the left-hand program for now; we will talk about the right-hand program later. + + + + +
Size set at run time:Size set at compile time:
+\include QuickStart_example2_dynamic.cpp + +\include QuickStart_example2_fixed.cpp +
+ +The output is as follows: + +\include QuickStart_example2_dynamic.out + + +\section GettingStartedExplanation2 Explanation of the second example + +The second example starts by declaring a 3-by-3 matrix \c m which is initialized using the \link DenseBase::Random(Index,Index) Random() \endlink method with random values between -1 and 1. The next line applies a linear mapping such that the values are between 10 and 110. The function call \link DenseBase::Constant(Index,Index,const Scalar&) MatrixXd::Constant\endlink(3,3,1.2) returns a 3-by-3 matrix expression having all coefficients equal to 1.2. The rest is standard arithmetics. + +The next line of the \c main function introduces a new type: \c VectorXd. This represents a (column) vector of arbitrary size. Here, the vector \c v is created to contain \c 3 coefficients which are left unitialized. The one but last line uses the so-called comma-initializer, explained in \ref TutorialAdvancedInitialization, to set all coefficients of the vector \c v to be as follows: + +\f[ +v = +\begin{bmatrix} + 1 \\ + 2 \\ + 3 +\end{bmatrix}. +\f] + +The final line of the program multiplies the matrix \c m with the vector \c v and outputs the result. + +Now look back at the second example program. We presented two versions of it. In the version in the left column, the matrix is of type \c MatrixXd which represents matrices of arbitrary size. The version in the right column is similar, except that the matrix is of type \c Matrix3d, which represents matrices of a fixed size (here 3-by-3). Because the type already encodes the size of the matrix, it is not necessary to specify the size in the constructor; compare MatrixXd m(3,3) with Matrix3d m. Similarly, we have \c VectorXd on the left (arbitrary size) versus \c Vector3d on the right (fixed size). Note that here the coefficients of vector \c v are directly set in the constructor, though the same syntax of the left example could be used too. + +The use of fixed-size matrices and vectors has two advantages. The compiler emits better (faster) code because it knows the size of the matrices and vectors. Specifying the size in the type also allows for more rigorous checking at compile-time. For instance, the compiler will complain if you try to multiply a \c Matrix4d (a 4-by-4 matrix) with a \c Vector3d (a vector of size 3). However, the use of many types increases compilation time and the size of the executable. The size of the matrix may also not be known at compile-time. A rule of thumb is to use fixed-size matrices for size 4-by-4 and smaller. + + +\section GettingStartedConclusion Where to go from here? + +It's worth taking the time to read the \ref TutorialMatrixClass "long tutorial". + +However if you think you don't need it, you can directly use the classes documentation and our \ref QuickRefPage. + +\li \b Next: \ref TutorialMatrixClass + +*/ + +} + diff --git a/thirdparty/eigen/doc/SparseLinearSystems.dox b/thirdparty/eigen/doc/SparseLinearSystems.dox new file mode 100644 index 000000000..fc33b93e7 --- /dev/null +++ b/thirdparty/eigen/doc/SparseLinearSystems.dox @@ -0,0 +1,229 @@ +namespace Eigen { +/** \eigenManualPage TopicSparseSystems Solving Sparse Linear Systems +In Eigen, there are several methods available to solve linear systems when the coefficient matrix is sparse. Because of the special representation of this class of matrices, special care should be taken in order to get a good performance. See \ref TutorialSparse for a detailed introduction about sparse matrices in Eigen. This page lists the sparse solvers available in Eigen. The main steps that are common to all these linear solvers are introduced as well. Depending on the properties of the matrix, the desired accuracy, the end-user is able to tune those steps in order to improve the performance of its code. Note that it is not required to know deeply what's hiding behind these steps: the last section presents a benchmark routine that can be easily used to get an insight on the performance of all the available solvers. + +\eigenAutoToc + +\section TutorialSparseSolverList List of sparse solvers + +%Eigen currently provides a wide set of built-in solvers, as well as wrappers to external solver libraries. +They are summarized in the following tables: + +\subsection TutorialSparseSolverList_Direct Built-in direct solvers + + + + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindFeatures related to performanceLicense

Notes

SimplicialLLT \n \#includeDirect LLt factorizationSPDFill-in reducingLGPLSimplicialLDLT is often preferable
SimplicialLDLT \n \#includeDirect LDLt factorizationSPDFill-in reducingLGPLRecommended for very sparse and not too large problems (e.g., 2D Poisson eq.)
SparseLU \n \#include LU factorization Square Fill-in reducing, Leverage fast dense algebraMPL2optimized for small and large problems with irregular patterns
SparseQR \n \#include QR factorizationAny, rectangular Fill-in reducingMPL2recommended for least-square problems, has a basic rank-revealing feature
+ +\subsection TutorialSparseSolverList_Iterative Built-in iterative solvers + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindSupported preconditioners, [default]License

Notes

ConjugateGradient \n \#include Classic iterative CGSPDIdentityPreconditioner, [DiagonalPreconditioner], IncompleteCholeskyMPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
LeastSquaresConjugateGradient \n \#includeCG for rectangular least-square problemRectangularIdentityPreconditioner, [LeastSquareDiagonalPreconditioner]MPL2Solve for min |A'Ax-b|^2 without forming A'A
BiCGSTAB \n \#includeIterative stabilized bi-conjugate gradientSquareIdentityPreconditioner, [DiagonalPreconditioner], IncompleteLUTMPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
+ +\subsection TutorialSparseSolverList_Wrapper Wrappers to external solvers + + + + + + + + + + + + + + + + + + + + + + +
ClassModuleSolver kindMatrix kindFeatures related to performanceDependencies,License

Notes

PastixLLT \n PastixLDLT \n PastixLU\link PaStiXSupport_Module PaStiXSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, MultithreadingRequires the PaStiX package, \b CeCILL-C optimized for tough problems and symmetric patterns
CholmodSupernodalLLT\link CholmodSupport_Module CholmodSupport \endlinkDirect LLt factorizationSPDFill-in reducing, Leverage fast dense algebraRequires the SuiteSparse package, \b GPL
UmfPackLU\link UmfPackSupport_Module UmfPackSupport \endlinkDirect LU factorizationSquareFill-in reducing, Leverage fast dense algebraRequires the SuiteSparse package, \b GPL
SuperLU\link SuperLUSupport_Module SuperLUSupport \endlinkDirect LU factorizationSquareFill-in reducing, Leverage fast dense algebraRequires the SuperLU library, (BSD-like)
SPQR\link SPQRSupport_Module SPQRSupport \endlink QR factorization Any, rectangularfill-in reducing, multithreaded, fast dense algebra requires the SuiteSparse package, \b GPL recommended for linear least-squares problems, has a rank-revealing feature
PardisoLLT \n PardisoLDLT \n PardisoLU\link PardisoSupport_Module PardisoSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, MultithreadingRequires the Intel MKL package, \b Proprietary optimized for tough problems patterns, see also \link TopicUsingIntelMKL using MKL with Eigen \endlink
+ +Here \c SPD means symmetric positive definite. + +\section TutorialSparseSolverConcept Sparse solver concept + +All these solvers follow the same general concept. +Here is a typical and general example: +\code +#include +// ... +SparseMatrix A; +// fill A +VectorXd b, x; +// fill b +// solve Ax = b +SolverClassName > solver; +solver.compute(A); +if(solver.info()!=Success) { + // decomposition failed + return; +} +x = solver.solve(b); +if(solver.info()!=Success) { + // solving failed + return; +} +// solve for another right hand side: +x1 = solver.solve(b1); +\endcode + +For \c SPD solvers, a second optional template argument allows to specify which triangular part have to be used, e.g.: + +\code +#include + +ConjugateGradient, Eigen::Upper> solver; +x = solver.compute(A).solve(b); +\endcode +In the above example, only the upper triangular part of the input matrix A is considered for solving. The opposite triangle might either be empty or contain arbitrary values. + +In the case where multiple problems with the same sparsity pattern have to be solved, then the "compute" step can be decomposed as follow: +\code +SolverClassName > solver; +solver.analyzePattern(A); // for this step the numerical values of A are not used +solver.factorize(A); +x1 = solver.solve(b1); +x2 = solver.solve(b2); +... +A = ...; // modify the values of the nonzeros of A, the nonzeros pattern must stay unchanged +solver.factorize(A); +x1 = solver.solve(b1); +x2 = solver.solve(b2); +... +\endcode +The compute() method is equivalent to calling both analyzePattern() and factorize(). + +Each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. +More details are available in the documentations of the respective classes. + +Finally, most of the iterative solvers, can also be used in a \b matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. + +\section TheSparseCompute The Compute Step +In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). + +The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step should not be used with other matrices. + +Eigen provides a limited set of methods to reorder the matrix in this step, either built-in (COLAMD, AMD) or external (METIS). These methods are set in template parameter list of the solver : +\code +DirectSolverClassName, OrderingMethod > solver; +\endcode + +See the \link OrderingMethods_Module OrderingMethods module \endlink for the list of available methods and the associated options. + +In factorize(), the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. + +For iterative solvers, the compute step is used to eventually setup a preconditioner. For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is selected by simply adding it as a template parameter to the iterative solver object. +\code +IterativeSolverClassName, PreconditionerName > solver; +\endcode +The member function preconditioner() returns a read-write reference to the preconditioner + to directly interact with it. See the \link IterativeLinearSolvers_Module Iterative solvers module \endlink and the documentation of each class for the list of available methods. + +\section TheSparseSolve The Solve step +The solve() function computes the solution of the linear systems with one or many right hand sides. +\code +X = solver.solve(B); +\endcode +Here, B can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance when all the right hand sides are not available at once. +\code +x1 = solver.solve(b1); +// Get the second right hand side b2 +x2 = solver.solve(b2); +// ... +\endcode +For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using \b setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_Module Iterative solvers module \endlink. + +\section BenchmarkRoutine +Most of the time, all you need is to know how much time it will take to solve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to bench/spbench and compile the routine by typing \b make \e spbenchsolver. Run it with --help option to get the list of all available options. Basically, the matrices to test should be in MatrixMarket Coordinate format, and the routine returns the statistics from all available solvers in Eigen. + +To export your matrices and right-hand-side vectors in the matrix-market format, you can the the unsupported SparseExtra module: +\code +#include +... +Eigen::saveMarket(A, "filename.mtx"); +Eigen::saveMarket(A, "filename_SPD.mtx", Eigen::Symmetric); // if A is symmetric-positive-definite +Eigen::saveMarketVector(B, "filename_b.mtx"); +\endcode + +The following table gives an example of XML statistics from several Eigen built-in and external solvers. + + +
Matrix N NNZ UMFPACK SUPERLU PASTIX LU BiCGSTAB BiCGSTAB+ILUT GMRES+ILUT LDLT CHOLMOD LDLT PASTIX LDLT LLT CHOLMOD SP LLT CHOLMOD LLT PASTIX LLT CG
vector_graphics 12855 72069 Compute Time 0.02545490.02156770.07018270.0001533880.01401070.01537090.01016010.009305020.0649689 +
Solve Time 0.003378350.0009518260.004843730.03748860.00464450.008477540.0005418130.0002936960.00485376 +
Total Time 0.02883330.02251950.07502650.0376420.01865520.02384840.01070190.009598710.0698227 +
Error(Iter) 1.299e-16 2.04207e-16 4.83393e-15 3.94856e-11 (80) 1.03861e-12 (3) 5.81088e-14 (6) 1.97578e-16 1.83927e-16 4.24115e-15 +
poisson_SPD 19788 308232 Compute Time 0.4250261.823780.6173670.0004789211.340011.334710.7964190.8575730.4730070.8148260.1847190.8615550.4705590.000458188 +
Solve Time 0.02800530.01944020.02687470.2494370.05484440.09269910.008502040.00531710.02589320.008746030.005781550.005303610.02489420.239093 +
Total Time 0.4530311.843220.6442410.2499161.394861.427410.8049210.8628910.49890.8235720.1905010.8668590.4954530.239551 +
Error(Iter) 4.67146e-16 1.068e-15 1.3397e-15 6.29233e-11 (201) 3.68527e-11 (6) 3.3168e-15 (16) 1.86376e-15 1.31518e-16 1.42593e-15 3.45361e-15 3.14575e-16 2.21723e-15 7.21058e-16 9.06435e-12 (261) +
sherman2 1080 23094 Compute Time 0.006317540.0150520.0247514 -0.02144250.0217988 +
Solve Time 0.0004784240.0003379980.0010291 -0.002431520.00246152 +
Total Time 0.006795970.015390.0257805 -0.0238740.0242603 +
Error(Iter) 1.83099e-15 8.19351e-15 2.625e-14 1.3678e+69 (1080) 4.1911e-12 (7) 5.0299e-13 (12) +
bcsstk01_SPD 48 400 Compute Time 0.0001690790.000107890.0005725381.425e-069.1612e-058.3985e-055.6489e-057.0913e-050.0004682515.7389e-058.0212e-055.8394e-050.0004630171.333e-06 +
Solve Time 1.2288e-051.1124e-050.0002863878.5896e-051.6381e-051.6984e-053.095e-064.115e-060.0003254383.504e-067.369e-063.454e-060.0002940956.0516e-05 +
Total Time 0.0001813670.0001190140.0008589258.7321e-050.0001079930.0001009695.9584e-057.5028e-050.0007936896.0893e-058.7581e-056.1848e-050.0007571126.1849e-05 +
Error(Iter) 1.03474e-16 2.23046e-16 2.01273e-16 4.87455e-07 (48) 1.03553e-16 (2) 3.55965e-16 (2) 2.48189e-16 1.88808e-16 1.97976e-16 2.37248e-16 1.82701e-16 2.71474e-16 2.11322e-16 3.547e-09 (48) +
sherman1 1000 3750 Compute Time 0.002288050.002092310.005282689.846e-060.001635220.001621550.0007892590.0008044950.00438269 +
Solve Time 0.0002137889.7983e-050.0009388310.006298350.0003617640.000787944.3989e-052.5331e-050.000917166 +
Total Time 0.002501840.002190290.006221510.00630820.001996980.002409490.0008332480.0008298260.00529986 +
Error(Iter) 1.16839e-16 2.25968e-16 2.59116e-16 3.76779e-11 (248) 4.13343e-11 (4) 2.22347e-14 (10) 2.05861e-16 1.83555e-16 1.02917e-15 +
young1c 841 4089 Compute Time 0.002358430.002172280.005680751.2735e-050.002648660.00258236 +
Solve Time 0.0003295990.0001686340.000801180.05347380.001871930.00450211 +
Total Time 0.002688030.002340910.006481930.05348650.004520590.00708447 +
Error(Iter) 1.27029e-16 2.81321e-16 5.0492e-15 8.0507e-11 (706) 3.00447e-12 (8) 1.46532e-12 (16) +
mhd1280b 1280 22778 Compute Time 0.002348980.002070790.005709182.5976e-050.003025630.002980360.001445250.0009199220.00426444 +
Solve Time 0.001033920.0002119110.001050.01104320.0006282870.003920890.0001383036.2446e-050.00097564 +
Total Time 0.00338290.00228270.006759180.01106920.003653920.006901240.001583550.0009823680.00524008 +
Error(Iter) 1.32953e-16 3.08646e-16 6.734e-16 8.83132e-11 (40) 1.51153e-16 (1) 6.08556e-16 (8) 1.89264e-16 1.97477e-16 6.68126e-09 +
crashbasis 160000 1750416 Compute Time 3.20195.789215.75730.003835153.10063.09921 +
Solve Time 0.2619150.1062250.4021411.490890.248880.443673 +
Total Time 3.463815.8954216.15941.494733.349483.54288 +
Error(Iter) 1.76348e-16 4.58395e-16 1.67982e-14 8.64144e-11 (61) 8.5996e-12 (2) 6.04042e-14 (5) + +
+*/ +} diff --git a/thirdparty/eigen/doc/SparseQuickReference.dox b/thirdparty/eigen/doc/SparseQuickReference.dox new file mode 100644 index 000000000..a25622e80 --- /dev/null +++ b/thirdparty/eigen/doc/SparseQuickReference.dox @@ -0,0 +1,272 @@ +namespace Eigen { +/** \eigenManualPage SparseQuickRefPage Quick reference guide for sparse matrices +\eigenAutoToc + +
+ +In this page, we give a quick summary of the main operations available for sparse matrices in the class SparseMatrix. First, it is recommended to read the introductory tutorial at \ref TutorialSparse. The important point to have in mind when working on sparse matrices is how they are stored : +i.e either row major or column major. The default is column major. Most arithmetic operations on sparse matrices will assert that they have the same storage order. + +\section SparseMatrixInit Sparse Matrix Initialization + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Category Operations Notes
Constructor +\code + SparseMatrix sm1(1000,1000); + SparseMatrix,RowMajor> sm2; +\endcode + Default is ColMajor
Resize/Reserve + \code + sm1.resize(m,n); // Change sm1 to a m x n matrix. + sm1.reserve(nnz); // Allocate room for nnz nonzeros elements. + \endcode + Note that when calling reserve(), it is not required that nnz is the exact number of nonzero elements in the final matrix. However, an exact estimation will avoid multiple reallocations during the insertion phase.
Assignment +\code + SparseMatrix sm1; + // Initialize sm2 with sm1. + SparseMatrix sm2(sm1), sm3; + // Assignment and evaluations modify the storage order. + sm3 = sm1; + \endcode + The copy constructor can be used to convert from a storage order to another
Element-wise Insertion +\code +// Insert a new element; + sm1.insert(i, j) = v_ij; + +// Update the value v_ij + sm1.coeffRef(i,j) = v_ij; + sm1.coeffRef(i,j) += v_ij; + sm1.coeffRef(i,j) -= v_ij; +\endcode + insert() assumes that the element does not already exist; otherwise, use coeffRef()
Batch insertion +\code + std::vector< Eigen::Triplet > tripletList; + tripletList.reserve(estimation_of_entries); + // -- Fill tripletList with nonzero elements... + sm1.setFromTriplets(TripletList.begin(), TripletList.end()); +\endcode +A complete example is available at \link TutorialSparseFilling Triplet Insertion \endlink.
Constant or Random Insertion +\code +sm1.setZero(); +\endcode +Remove all non-zero coefficients
+ + +\section SparseBasicInfos Matrix properties +Beyond the basic functions rows() and cols(), there are some useful functions that are available to easily get some informations from the matrix. + + + + +
\code + sm1.rows(); // Number of rows + sm1.cols(); // Number of columns + sm1.nonZeros(); // Number of non zero values + sm1.outerSize(); // Number of columns (resp. rows) for a column major (resp. row major ) + sm1.innerSize(); // Number of rows (resp. columns) for a row major (resp. column major) + sm1.norm(); // Euclidian norm of the matrix + sm1.squaredNorm(); // Squared norm of the matrix + sm1.blueNorm(); + sm1.isVector(); // Check if sm1 is a sparse vector or a sparse matrix + sm1.isCompressed(); // Check if sm1 is in compressed form + ... + \endcode
+ +\section SparseBasicOps Arithmetic operations +It is easy to perform arithmetic operations on sparse matrices provided that the dimensions are adequate and that the matrices have the same storage order. Note that the evaluation can always be done in a matrix with a different storage order. In the following, \b sm denotes a sparse matrix, \b dm a dense matrix and \b dv a dense vector. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Operations Code Notes
add subtract \code + sm3 = sm1 + sm2; + sm3 = sm1 - sm2; + sm2 += sm1; + sm2 -= sm1; \endcode + + sm1 and sm2 should have the same storage order +
+ scalar product\code + sm3 = sm1 * s1; sm3 *= s1; + sm3 = s1 * sm1 + s2 * sm2; sm3 /= s1;\endcode + + Many combinations are possible if the dimensions and the storage order agree. +
%Sparse %Product \code + sm3 = sm1 * sm2; + dm2 = sm1 * dm1; + dv2 = sm1 * dv1; + \endcode +
transposition, adjoint \code + sm2 = sm1.transpose(); + sm2 = sm1.adjoint(); + \endcode + Note that the transposition change the storage order. There is no support for transposeInPlace(). +
Permutation +\code +perm.indices(); // Reference to the vector of indices +sm1.twistedBy(perm); // Permute rows and columns +sm2 = sm1 * perm; // Permute the columns +sm2 = perm * sm1; // Permute the columns +\endcode + + +
+ Component-wise ops + \code + sm1.cwiseProduct(sm2); + sm1.cwiseQuotient(sm2); + sm1.cwiseMin(sm2); + sm1.cwiseMax(sm2); + sm1.cwiseAbs(); + sm1.cwiseSqrt(); + \endcode + sm1 and sm2 should have the same storage order +
+ +\section sparseotherops Other supported operations + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Code Notes
Sub-matrices
+\code + sm1.block(startRow, startCol, rows, cols); + sm1.block(startRow, startCol); + sm1.topLeftCorner(rows, cols); + sm1.topRightCorner(rows, cols); + sm1.bottomLeftCorner( rows, cols); + sm1.bottomRightCorner( rows, cols); + \endcode + +Contrary to dense matrices, here all these methods are read-only.\n +See \ref TutorialSparse_SubMatrices and below for read-write sub-matrices. +
Range
+\code + sm1.innerVector(outer); // RW + sm1.innerVectors(start, size); // RW + sm1.leftCols(size); // RW + sm2.rightCols(size); // RO because sm2 is row-major + sm1.middleRows(start, numRows); // RO because sm1 is column-major + sm1.middleCols(start, numCols); // RW + sm1.col(j); // RW +\endcode + +A inner vector is either a row (for row-major) or a column (for column-major).\n +As stated earlier, for a read-write sub-matrix (RW), the evaluation can be done in a matrix with different storage order. +
Triangular and selfadjoint views
+\code + sm2 = sm1.triangularview(); + sm2 = sm1.selfadjointview(); +\endcode + Several combination between triangular views and blocks views are possible +\code + \endcode
Triangular solve
+\code + dv2 = sm1.triangularView().solve(dv1); + dv2 = sm1.topLeftCorner(size, size) + .triangularView().solve(dv1); +\endcode + For general sparse solve, Use any suitable module described at \ref TopicSparseSystems
Low-level API
+\code +sm1.valuePtr(); // Pointer to the values +sm1.innerIndextr(); // Pointer to the indices. +sm1.outerIndexPtr(); // Pointer to the beginning of each inner vector +\endcode + +If the matrix is not in compressed form, makeCompressed() should be called before.\n +Note that these functions are mostly provided for interoperability purposes with external libraries.\n +A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section
Mapping external buffers
+\code +int outerIndexPtr[cols+1]; +int innerIndices[nnz]; +double values[nnz]; +Map > sm1(rows,cols,nnz,outerIndexPtr, // read-write + innerIndices,values); +Map > sm2(...); // read-only +\endcode +As for dense matrices, class Map can be used to see external buffers as an %Eigen's SparseMatrix object.
+*/ +} diff --git a/thirdparty/eigen/doc/StlContainers.dox b/thirdparty/eigen/doc/StlContainers.dox new file mode 100644 index 000000000..e0f8714a9 --- /dev/null +++ b/thirdparty/eigen/doc/StlContainers.dox @@ -0,0 +1,62 @@ +namespace Eigen { + +/** \eigenManualPage TopicStlContainers Using STL Containers with Eigen + +\eigenAutoToc + +\section StlContainers_summary Executive summary + +Using STL containers on \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types", or classes having members of such types, requires taking the following two steps: + +\li A 16-byte-aligned allocator must be used. Eigen does provide one ready for use: aligned_allocator. +\li If you want to use the std::vector container, you need to \#include . + +These issues arise only with \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types" and \ref TopicStructHavingEigenMembers "structures having such Eigen objects as member". For other Eigen types, such as Vector3f or MatrixXd, no special care is needed when using STL containers. + +\section allocator Using an aligned allocator + +STL containers take an optional template parameter, the allocator type. When using STL containers on \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types", you need tell the container to use an allocator that will always allocate memory at 16-byte-aligned locations. Fortunately, Eigen does provide such an allocator: Eigen::aligned_allocator. + +For example, instead of +\code +std::map +\endcode +you need to use +\code +std::map, + Eigen::aligned_allocator > > +\endcode +Note that the third parameter "std::less" is just the default value, but we have to include it because we want to specify the fourth parameter, which is the allocator type. + +\section StlContainers_vector The case of std::vector + +The situation with std::vector was even worse (explanation below) so we had to specialize it for the Eigen::aligned_allocator type. In practice you \b must use the Eigen::aligned_allocator (not another aligned allocator), \b and \#include . + +Here is an example: +\code +#include +/* ... */ +std::vector > +\endcode + +\subsection vector_spec An alternative - specializing std::vector for Eigen types + +As an alternative to the recommended approach described above, you have the option to specialize std::vector for Eigen types requiring alignment. +The advantage is that you won't need to declare std::vector all over with Eigen::allocator. One drawback on the other hand side is that +the specialization needs to be defined before all code pieces in which e.g. std::vector is used. Otherwise, without knowing the specialization +the compiler will compile that particular instance with the default std::allocator and you program is most likely to crash. + +Here is an example: +\code +#include +/* ... */ +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Matrix2d) +std::vector +\endcode + +\b Explanation: The resize() method of std::vector takes a value_type argument (defaulting to value_type()). So with std::vector, some Eigen::Vector4f objects will be passed by value, which discards any alignment modifiers, so a Eigen::Vector4f can be created at an unaligned location. In order to avoid that, the only solution we saw was to specialize std::vector to make it work on a slight modification of, here, Eigen::Vector4f, that is able to deal properly with this situation. + + +*/ + +} diff --git a/thirdparty/eigen/doc/StorageOrders.dox b/thirdparty/eigen/doc/StorageOrders.dox new file mode 100644 index 000000000..61645313e --- /dev/null +++ b/thirdparty/eigen/doc/StorageOrders.dox @@ -0,0 +1,86 @@ +namespace Eigen { + +/** \eigenManualPage TopicStorageOrders Storage orders + +There are two different storage orders for matrices and two-dimensional arrays: column-major and row-major. +This page explains these storage orders and how to specify which one should be used. + +\eigenAutoToc + + +\section TopicStorageOrdersIntro Column-major and row-major storage + +The entries of a matrix form a two-dimensional grid. However, when the matrix is stored in memory, the entries +have to somehow be laid out linearly. There are two main ways to do this, by row and by column. + +We say that a matrix is stored in \b row-major order if it is stored row by row. The entire first row is +stored first, followed by the entire second row, and so on. Consider for example the matrix + +\f[ +A = \begin{bmatrix} +8 & 2 & 2 & 9 \\ +9 & 1 & 4 & 4 \\ +3 & 5 & 4 & 5 +\end{bmatrix}. +\f] + +If this matrix is stored in row-major order, then the entries are laid out in memory as follows: + +\code 8 2 2 9 9 1 4 4 3 5 4 5 \endcode + +On the other hand, a matrix is stored in \b column-major order if it is stored column by column, starting with +the entire first column, followed by the entire second column, and so on. If the above matrix is stored in +column-major order, it is laid out as follows: + +\code 8 9 3 2 1 5 2 4 4 9 4 5 \endcode + +This example is illustrated by the following Eigen code. It uses the PlainObjectBase::data() function, which +returns a pointer to the memory location of the first entry of the matrix. + + + + +
ExampleOutput
+\include TopicStorageOrders_example.cpp + +\verbinclude TopicStorageOrders_example.out +
+ + +\section TopicStorageOrdersInEigen Storage orders in Eigen + +The storage order of a matrix or a two-dimensional array can be set by specifying the \c Options template +parameter for Matrix or Array. As \ref TutorialMatrixClass explains, the %Matrix class template has six +template parameters, of which three are compulsory (\c Scalar, \c RowsAtCompileTime and \c ColsAtCompileTime) +and three are optional (\c Options, \c MaxRowsAtCompileTime and \c MaxColsAtCompileTime). If the \c Options +parameter is set to \c RowMajor, then the matrix or array is stored in row-major order; if it is set to +\c ColMajor, then it is stored in column-major order. This mechanism is used in the above Eigen program to +specify the storage order. + +If the storage order is not specified, then Eigen defaults to storing the entry in column-major. This is also +the case if one of the convenience typedefs (\c Matrix3f, \c ArrayXXd, etc.) is used. + +Matrices and arrays using one storage order can be assigned to matrices and arrays using the other storage +order, as happens in the above program when \c Arowmajor is initialized using \c Acolmajor. Eigen will reorder +the entries automatically. More generally, row-major and column-major matrices can be mixed in an expression +as we want. + + +\section TopicStorageOrdersWhich Which storage order to choose? + +So, which storage order should you use in your program? There is no simple answer to this question; it depends +on your application. Here are some points to keep in mind: + + - Your users may expect you to use a specific storage order. Alternatively, you may use other libraries than + Eigen, and these other libraries may expect a certain storage order. In these cases it may be easiest and + fastest to use this storage order in your whole program. + - Algorithms that traverse a matrix row by row will go faster when the matrix is stored in row-major order + because of better data locality. Similarly, column-by-column traversal is faster for column-major + matrices. It may be worthwhile to experiment a bit to find out what is faster for your particular + application. + - The default in Eigen is column-major. Naturally, most of the development and testing of the Eigen library + is thus done with column-major matrices. This means that, even though we aim to support column-major and + row-major storage orders transparently, the Eigen library may well work best with column-major matrices. + +*/ +} diff --git a/thirdparty/eigen/doc/StructHavingEigenMembers.dox b/thirdparty/eigen/doc/StructHavingEigenMembers.dox new file mode 100644 index 000000000..7fbed0eb0 --- /dev/null +++ b/thirdparty/eigen/doc/StructHavingEigenMembers.dox @@ -0,0 +1,190 @@ +namespace Eigen { + +/** \eigenManualPage TopicStructHavingEigenMembers Structures Having Eigen Members + +\eigenAutoToc + +\section StructHavingEigenMembers_summary Executive Summary + +If you define a structure having members of \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types", you must overload its "operator new" so that it generates 16-bytes-aligned pointers. Fortunately, %Eigen provides you with a macro EIGEN_MAKE_ALIGNED_OPERATOR_NEW that does that for you. + +\section StructHavingEigenMembers_what What kind of code needs to be changed? + +The kind of code that needs to be changed is this: + +\code +class Foo +{ + ... + Eigen::Vector2d v; + ... +}; + +... + +Foo *foo = new Foo; +\endcode + +In other words: you have a class that has as a member a \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen object", and then you dynamically create an object of that class. + +\section StructHavingEigenMembers_how How should such code be modified? + +Very easy, you just need to put a EIGEN_MAKE_ALIGNED_OPERATOR_NEW macro in a public part of your class, like this: + +\code +class Foo +{ + ... + Eigen::Vector2d v; + ... +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW +}; + +... + +Foo *foo = new Foo; +\endcode + +This macro makes "new Foo" always return an aligned pointer. + +If this approach is too intrusive, see also the \ref StructHavingEigenMembers_othersolutions "other solutions". + +\section StructHavingEigenMembers_why Why is this needed? + +OK let's say that your code looks like this: + +\code +class Foo +{ + ... + Eigen::Vector2d v; + ... +}; + +... + +Foo *foo = new Foo; +\endcode + +A Eigen::Vector2d consists of 2 doubles, which is 128 bits. Which is exactly the size of a SSE packet, which makes it possible to use SSE for all sorts of operations on this vector. But SSE instructions (at least the ones that %Eigen uses, which are the fast ones) require 128-bit alignment. Otherwise you get a segmentation fault. + +For this reason, Eigen takes care by itself to require 128-bit alignment for Eigen::Vector2d, by doing two things: +\li Eigen requires 128-bit alignment for the Eigen::Vector2d's array (of 2 doubles). With GCC, this is done with a __attribute__ ((aligned(16))). +\li Eigen overloads the "operator new" of Eigen::Vector2d so it will always return 128-bit aligned pointers. + +Thus, normally, you don't have to worry about anything, Eigen handles alignment for you... + +... except in one case. When you have a class Foo like above, and you dynamically allocate a new Foo as above, then, since Foo doesn't have aligned "operator new", the returned pointer foo is not necessarily 128-bit aligned. + +The alignment attribute of the member v is then relative to the start of the class, foo. If the foo pointer wasn't aligned, then foo->v won't be aligned either! + +The solution is to let class Foo have an aligned "operator new", as we showed in the previous section. + +\section StructHavingEigenMembers_movetotop Should I then put all the members of Eigen types at the beginning of my class? + +That's not required. Since Eigen takes care of declaring 128-bit alignment, all members that need it are automatically 128-bit aligned relatively to the class. So code like this works fine: + +\code +class Foo +{ + double x; + Eigen::Vector2d v; +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW +}; +\endcode + +\section StructHavingEigenMembers_dynamicsize What about dynamic-size matrices and vectors? + +Dynamic-size matrices and vectors, such as Eigen::VectorXd, allocate dynamically their own array of coefficients, so they take care of requiring absolute alignment automatically. So they don't cause this issue. The issue discussed here is only with \ref TopicFixedSizeVectorizable "fixed-size vectorizable matrices and vectors". + +\section StructHavingEigenMembers_bugineigen So is this a bug in Eigen? + +No, it's not our bug. It's more like an inherent problem of the C++98 language specification, and seems to be taken care of in the upcoming language revision: see this document. + +\section StructHavingEigenMembers_conditional What if I want to do this conditionnally (depending on template parameters) ? + +For this situation, we offer the macro EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign). It will generate aligned operators like EIGEN_MAKE_ALIGNED_OPERATOR_NEW if NeedsToAlign is true. It will generate operators with the default alignment if NeedsToAlign is false. + +Example: + +\code +template class Foo +{ + typedef Eigen::Matrix Vector; + enum { NeedsToAlign = (sizeof(Vector)%16)==0 }; + ... + Vector v; + ... +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) +}; + +... + +Foo<4> *foo4 = new Foo<4>; // foo4 is guaranteed to be 128bit-aligned +Foo<3> *foo3 = new Foo<3>; // foo3 has only the system default alignment guarantee +\endcode + + +\section StructHavingEigenMembers_othersolutions Other solutions + +In case putting the EIGEN_MAKE_ALIGNED_OPERATOR_NEW macro everywhere is too intrusive, there exists at least two other solutions. + +\subsection othersolutions1 Disabling alignment + +The first is to disable alignment requirement for the fixed size members: +\code +class Foo +{ + ... + Eigen::Matrix v; + ... +}; +\endcode +This has for effect to disable vectorization when using \c v. +If a function of Foo uses it several times, then it still possible to re-enable vectorization by copying it into an aligned temporary vector: +\code +void Foo::bar() +{ + Eigen::Vector2d av(v); + // use av instead of v + ... + // if av changed, then do: + v = av; +} +\endcode + +\subsection othersolutions2 Private structure + +The second consist in storing the fixed-size objects into a private struct which will be dynamically allocated at the construction time of the main object: + +\code +struct Foo_d +{ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + Vector2d v; + ... +}; + + +struct Foo { + Foo() { init_d(); } + ~Foo() { delete d; } + void bar() + { + // use d->v instead of v + ... + } +private: + void init_d() { d = new Foo_d; } + Foo_d* d; +}; +\endcode + +The clear advantage here is that the class Foo remains unchanged regarding alignment issues. The drawback is that a heap allocation will be required whatsoever. + +*/ + +} diff --git a/thirdparty/eigen/doc/TemplateKeyword.dox b/thirdparty/eigen/doc/TemplateKeyword.dox new file mode 100644 index 000000000..b84cfdae9 --- /dev/null +++ b/thirdparty/eigen/doc/TemplateKeyword.dox @@ -0,0 +1,133 @@ +namespace Eigen { + +/** \page TopicTemplateKeyword The template and typename keywords in C++ + +There are two uses for the \c template and \c typename keywords in C++. One of them is fairly well known +amongst programmers: to define templates. The other use is more obscure: to specify that an expression refers +to a template function or a type. This regularly trips up programmers that use the %Eigen library, often +leading to error messages from the compiler that are difficult to understand, such as "expected expression" or +"no match for operator<". + +\eigenAutoToc + + +\section TopicTemplateKeywordToDefineTemplates Using the template and typename keywords to define templates + +The \c template and \c typename keywords are routinely used to define templates. This is not the topic of this +page as we assume that the reader is aware of this (otherwise consult a C++ book). The following example +should illustrate this use of the \c template keyword. + +\code +template +bool isPositive(T x) +{ + return x > 0; +} +\endcode + +We could just as well have written template <class T>; the keywords \c typename and \c class have the +same meaning in this context. + + +\section TopicTemplateKeywordExample An example showing the second use of the template keyword + +Let us illustrate the second use of the \c template keyword with an example. Suppose we want to write a +function which copies all entries in the upper triangular part of a matrix into another matrix, while keeping +the lower triangular part unchanged. A straightforward implementation would be as follows: + + + + +
Example:Output:
+\include TemplateKeyword_simple.cpp + +\verbinclude TemplateKeyword_simple.out +
+ +That works fine, but it is not very flexible. First, it only works with dynamic-size matrices of +single-precision floats; the function \c copyUpperTriangularPart() does not accept static-size matrices or +matrices with double-precision numbers. Second, if you use an expression such as +mat.topLeftCorner(3,3) as the parameter \c src, then this is copied into a temporary variable of type +MatrixXf; this copy can be avoided. + +As explained in \ref TopicFunctionTakingEigenTypes, both issues can be resolved by making +\c copyUpperTriangularPart() accept any object of type MatrixBase. This leads to the following code: + + + + +
Example:Output:
+\include TemplateKeyword_flexible.cpp + +\verbinclude TemplateKeyword_flexible.out +
+ +The one line in the body of the function \c copyUpperTriangularPart() shows the second, more obscure use of +the \c template keyword in C++. Even though it may look strange, the \c template keywords are necessary +according to the standard. Without it, the compiler may reject the code with an error message like "no match +for operator<". + + +\section TopicTemplateKeywordExplanation Explanation + +The reason that the \c template keyword is necessary in the last example has to do with the rules for how +templates are supposed to be compiled in C++. The compiler has to check the code for correct syntax at the +point where the template is defined, without knowing the actual value of the template arguments (\c Derived1 +and \c Derived2 in the example). That means that the compiler cannot know that dst.triangularView is +a member template and that the following < symbol is part of the delimiter for the template +parameter. Another possibility would be that dst.triangularView is a member variable with the < +symbol refering to the operator<() function. In fact, the compiler should choose the second +possibility, according to the standard. If dst.triangularView is a member template (as in our case), +the programmer should specify this explicitly with the \c template keyword and write dst.template +triangularView. + +The precise rules are rather complicated, but ignoring some subtleties we can summarize them as follows: +- A dependent name is name that depends (directly or indirectly) on a template parameter. In the + example, \c dst is a dependent name because it is of type MatrixBase<Derived1> which depends + on the template parameter \c Derived1. +- If the code contains either one of the constructs xxx.yyy or xxx->yyy and \c xxx is a + dependent name and \c yyy refers to a member template, then the \c template keyword must be used before + \c yyy, leading to xxx.template yyy or xxx->template yyy. +- If the code contains the construct xxx::yyy and \c xxx is a dependent name and \c yyy refers to a + member typedef, then the \c typename keyword must be used before the whole construct, leading to + typename xxx::yyy. + +As an example where the \c typename keyword is required, consider the following code in \ref TutorialSparse +for iterating over the non-zero entries of a sparse matrix type: + +\code +SparseMatrixType mat(rows,cols); +for (int k=0; k +void iterateOverSparseMatrix(const SparseMatrix& mat; +{ + for (int k=0; k::InnerIterator it(mat,k); it; ++it) + { + /* ... */ + } +} +\endcode + + +\section TopicTemplateKeywordResources Resources for further reading + +For more information and a fuller explanation of this topic, the reader may consult the following sources: +- The book "C++ Template Metaprogramming" by David Abrahams and Aleksey Gurtovoy contains a very good + explanation in Appendix B ("The typename and template Keywords") which formed the basis for this page. +- http://pages.cs.wisc.edu/~driscoll/typename.html +- http://www.parashift.com/c++-faq-lite/templates.html#faq-35.18 +- http://www.comeaucomputing.com/techtalk/templates/#templateprefix +- http://www.comeaucomputing.com/techtalk/templates/#typename + +*/ +} diff --git a/thirdparty/eigen/doc/TopicAliasing.dox b/thirdparty/eigen/doc/TopicAliasing.dox new file mode 100644 index 000000000..a8f164428 --- /dev/null +++ b/thirdparty/eigen/doc/TopicAliasing.dox @@ -0,0 +1,237 @@ +namespace Eigen { + +/** \eigenManualPage TopicAliasing Aliasing + +In %Eigen, aliasing refers to assignment statement in which the same matrix (or array or vector) appears on the +left and on the right of the assignment operators. Statements like mat = 2 * mat; or mat = +mat.transpose(); exhibit aliasing. The aliasing in the first example is harmless, but the aliasing in the +second example leads to unexpected results. This page explains what aliasing is, when it is harmful, and what +to do about it. + +\eigenAutoToc + + +\section TopicAliasingExamples Examples + +Here is a simple example exhibiting aliasing: + + + + +
ExampleOutput
+\include TopicAliasing_block.cpp + +\verbinclude TopicAliasing_block.out +
+ +The output is not what one would expect. The problem is the assignment +\code +mat.bottomRightCorner(2,2) = mat.topLeftCorner(2,2); +\endcode +This assignment exhibits aliasing: the coefficient \c mat(1,1) appears both in the block +mat.bottomRightCorner(2,2) on the left-hand side of the assignment and the block +mat.topLeftCorner(2,2) on the right-hand side. After the assignment, the (2,2) entry in the bottom +right corner should have the value of \c mat(1,1) before the assignment, which is 5. However, the output shows +that \c mat(2,2) is actually 1. The problem is that %Eigen uses lazy evaluation (see +\ref TopicEigenExpressionTemplates) for mat.topLeftCorner(2,2). The result is similar to +\code +mat(1,1) = mat(0,0); +mat(1,2) = mat(0,1); +mat(2,1) = mat(1,0); +mat(2,2) = mat(1,1); +\endcode +Thus, \c mat(2,2) is assigned the \e new value of \c mat(1,1) instead of the old value. The next section +explains how to solve this problem by calling \link DenseBase::eval() eval()\endlink. + +Aliasing occurs more naturally when trying to shrink a matrix. For example, the expressions vec = +vec.head(n) and mat = mat.block(i,j,r,c) exhibit aliasing. + +In general, aliasing cannot be detected at compile time: if \c mat in the first example were a bit bigger, +then the blocks would not overlap, and there would be no aliasing problem. However, %Eigen does detect some +instances of aliasing, albeit at run time. The following example exhibiting aliasing was mentioned in \ref +TutorialMatrixArithmetic : + + + + +
ExampleOutput
+\include tut_arithmetic_transpose_aliasing.cpp + +\verbinclude tut_arithmetic_transpose_aliasing.out +
+ +Again, the output shows the aliasing issue. However, by default %Eigen uses a run-time assertion to detect this +and exits with a message like + +\verbatim +void Eigen::DenseBase::checkTransposeAliasing(const OtherDerived&) const +[with OtherDerived = Eigen::Transpose >, Derived = Eigen::Matrix]: +Assertion `(!internal::check_transpose_aliasing_selector::IsTransposed,OtherDerived>::run(internal::extract_data(derived()), other)) +&& "aliasing detected during transposition, use transposeInPlace() or evaluate the rhs into a temporary using .eval()"' failed. +\endverbatim + +The user can turn %Eigen's run-time assertions like the one to detect this aliasing problem off by defining the +EIGEN_NO_DEBUG macro, and the above program was compiled with this macro turned off in order to illustrate the +aliasing problem. See \ref TopicAssertions for more information about %Eigen's run-time assertions. + + +\section TopicAliasingSolution Resolving aliasing issues + +If you understand the cause of the aliasing issue, then it is obvious what must happen to solve it: %Eigen has +to evaluate the right-hand side fully into a temporary matrix/array and then assign it to the left-hand +side. The function \link DenseBase::eval() eval() \endlink does precisely that. + +For example, here is the corrected version of the first example above: + + + + +
ExampleOutput
+\include TopicAliasing_block_correct.cpp + +\verbinclude TopicAliasing_block_correct.out +
+ +Now, \c mat(2,2) equals 5 after the assignment, as it should be. + +The same solution also works for the second example, with the transpose: simply replace the line +a = a.transpose(); with a = a.transpose().eval();. However, in this common case there is a +better solution. %Eigen provides the special-purpose function +\link DenseBase::transposeInPlace() transposeInPlace() \endlink which replaces a matrix by its transpose. +This is shown below: + + + + +
ExampleOutput
+\include tut_arithmetic_transpose_inplace.cpp + +\verbinclude tut_arithmetic_transpose_inplace.out +
+ +If an xxxInPlace() function is available, then it is best to use it, because it indicates more clearly what you +are doing. This may also allow %Eigen to optimize more aggressively. These are some of the xxxInPlace() +functions provided: + + + + + + + + + +
Original functionIn-place function
MatrixBase::adjoint() MatrixBase::adjointInPlace()
DenseBase::reverse() DenseBase::reverseInPlace()
LDLT::solve() LDLT::solveInPlace()
LLT::solve() LLT::solveInPlace()
TriangularView::solve() TriangularView::solveInPlace()
DenseBase::transpose() DenseBase::transposeInPlace()
+ +In the special case where a matrix or vector is shrunk using an expression like vec = vec.head(n), +you can use \link PlainObjectBase::conservativeResize() conservativeResize() \endlink. + + +\section TopicAliasingCwise Aliasing and component-wise operations + +As explained above, it may be dangerous if the same matrix or array occurs on both the left-hand side and the +right-hand side of an assignment operator, and it is then often necessary to evaluate the right-hand side +explicitly. However, applying component-wise operations (such as matrix addition, scalar multiplication and +array multiplication) is safe. + +The following example has only component-wise operations. Thus, there is no need for \link DenseBase::eval() +eval() \endlink even though the same matrix appears on both sides of the assignments. + + + + +
ExampleOutput
+\include TopicAliasing_cwise.cpp + +\verbinclude TopicAliasing_cwise.out +
+ +In general, an assignment is safe if the (i,j) entry of the expression on the right-hand side depends only on +the (i,j) entry of the matrix or array on the left-hand side and not on any other entries. In that case it is +not necessary to evaluate the right-hand side explicitly. + + +\section TopicAliasingMatrixMult Aliasing and matrix multiplication + +Matrix multiplication is the only operation in %Eigen that assumes aliasing by default, under the +condition that the destination matrix is not resized. +Thus, if \c matA is a \b squared matrix, then the statement matA = matA * matA; is safe. +All other operations in %Eigen assume that there are no aliasing problems, +either because the result is assigned to a different matrix or because it is a component-wise operation. + + + + +
ExampleOutput
+\include TopicAliasing_mult1.cpp + +\verbinclude TopicAliasing_mult1.out +
+ +However, this comes at a price. When executing the expression matA = matA * matA, %Eigen evaluates the +product in a temporary matrix which is assigned to \c matA after the computation. This is fine. But %Eigen does +the same when the product is assigned to a different matrix (e.g., matB = matA * matA). In that case, +it is more efficient to evaluate the product directly into \c matB instead of evaluating it first into a +temporary matrix and copying that matrix to \c matB. + +The user can indicate with the \link MatrixBase::noalias() noalias()\endlink function that there is no +aliasing, as follows: matB.noalias() = matA * matA. This allows %Eigen to evaluate the matrix product +matA * matA directly into \c matB. + + + + +
ExampleOutput
+\include TopicAliasing_mult2.cpp + +\verbinclude TopicAliasing_mult2.out +
+ +Of course, you should not use \c noalias() when there is in fact aliasing taking place. If you do, then you +may get wrong results: + + + + +
ExampleOutput
+\include TopicAliasing_mult3.cpp + +\verbinclude TopicAliasing_mult3.out +
+ +Moreover, starting in Eigen 3.3, aliasing is \b not assumed if the destination matrix is resized and the product is not directly assigned to the destination. +Therefore, the following example is also wrong: + + + + +
ExampleOutput
+\include TopicAliasing_mult4.cpp + +\verbinclude TopicAliasing_mult4.out +
+ +As for any aliasing issue, you can resolve it by explicitly evaluating the expression prior to assignment: + + + +
ExampleOutput
+\include TopicAliasing_mult5.cpp + +\verbinclude TopicAliasing_mult5.out +
+ +\section TopicAliasingSummary Summary + +Aliasing occurs when the same matrix or array coefficients appear both on the left- and the right-hand side of +an assignment operator. + - Aliasing is harmless with coefficient-wise computations; this includes scalar multiplication and matrix or + array addition. + - When you multiply two matrices, %Eigen assumes that aliasing occurs. If you know that there is no aliasing, + then you can use \link MatrixBase::noalias() noalias()\endlink. + - In all other situations, %Eigen assumes that there is no aliasing issue and thus gives the wrong result if + aliasing does in fact occur. To prevent this, you have to use \link DenseBase::eval() eval() \endlink or + one of the xxxInPlace() functions. + +*/ +} diff --git a/thirdparty/eigen/doc/TopicAssertions.dox b/thirdparty/eigen/doc/TopicAssertions.dox new file mode 100644 index 000000000..c8b4d84f2 --- /dev/null +++ b/thirdparty/eigen/doc/TopicAssertions.dox @@ -0,0 +1,108 @@ +namespace Eigen { + +/** \page TopicAssertions Assertions + +\eigenAutoToc + +\section PlainAssert Assertions + +The macro eigen_assert is defined to be \c eigen_plain_assert by default. We use eigen_plain_assert instead of \c assert to work around a known bug for GCC <= 4.3. Basically, eigen_plain_assert \a is \c assert. + +\subsection RedefineAssert Redefining assertions + +Both eigen_assert and eigen_plain_assert are defined in Macros.h. Defining eigen_assert indirectly gives you a chance to change its behavior. You can redefine this macro if you want to do something else such as throwing an exception, and fall back to its default behavior with eigen_plain_assert. The code below tells Eigen to throw an std::runtime_error: + +\code +#include +#undef eigen_assert +#define eigen_assert(x) \ + if (!(x)) { throw (std::runtime_error("Put your message here")); } +\endcode + +\subsection DisableAssert Disabling assertions + +Assertions cost run time and can be turned off. You can suppress eigen_assert by defining \c EIGEN_NO_DEBUG \b before including Eigen headers. \c EIGEN_NO_DEBUG is undefined by default unless \c NDEBUG is defined. + +\section StaticAssert Static assertions + +Static assertions are not standardized until C++11. However, in the Eigen library, there are many conditions can and should be detectedat compile time. For instance, we use static assertions to prevent the code below from compiling. + +\code +Matrix3d() + Matrix4d(); // adding matrices of different sizes +Matrix4cd() * Vector3cd(); // invalid product known at compile time +\endcode + +Static assertions are defined in StaticAssert.h. If there is native static_assert, we use it. Otherwise, we have implemented an assertion macro that can show a limited range of messages. + +One can easily come up with static assertions without messages, such as: + +\code +#define STATIC_ASSERT(x) \ + switch(0) { case 0: case x:; } +\endcode + +However, the example above obviously cannot tell why the assertion failed. Therefore, we define a \c struct in namespace Eigen::internal to handle available messages. + +\code +template +struct static_assertion {}; + +template<> +struct static_assertion +{ + enum { + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, + // see StaticAssert.h for all enums. + }; +}; +\endcode + +And then, we define EIGEN_STATIC_ASSERT(CONDITION,MSG) to access Eigen::internal::static_assertion::MSG. If the condition evaluates into \c false, your compiler displays a lot of messages explaining there is no MSG in static_assert. Nevertheless, this is \a not in what we are interested. As you can see, all members of static_assert are ALL_CAPS_AND_THEY_ARE_SHOUTING. + +\warning +When using this macro, MSG should be a member of static_assertion, or the static assertion \b always fails. +Currently, it can only be used in function scope. + +\subsection DerivedStaticAssert Derived static assertions + +There are other macros derived from EIGEN_STATIC_ASSERT to enhance readability. Their names are self-explanatory. + +- \b EIGEN_STATIC_ASSERT_FIXED_SIZE(TYPE) - passes if \a TYPE is fixed size. +- \b EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(TYPE) - passes if \a TYPE is dynamic size. +- \b EIGEN_STATIC_ASSERT_LVALUE(Derived) - failes if \a Derived is read-only. +- \b EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) - passes if \a Derived is an array expression. +- EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) - failes if the two expressions are an array one and a matrix one. + +Because Eigen handles both fixed-size and dynamic-size expressions, some conditions cannot be clearly determined at compile time. We classify them into strict assertions and permissive assertions. + +\subsubsection StrictAssertions Strict assertions + +These assertions fail if the condition may not be met. For example, MatrixXd may not be a vector, so it fails EIGEN_STATIC_ASSERT_VECTOR_ONLY. + +- \b EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) - passes if \a TYPE must be a vector type. +- EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(TYPE, SIZE) - passes if \a TYPE must be a vector of the given size. +- EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(TYPE, ROWS, COLS) - passes if \a TYPE must be a matrix with given rows and columns. + +\subsubsection PermissiveAssertions Permissive assertions + +These assertions fail if the condition \b cannot be met. For example, MatrixXd and Matrix4d may have the same size, so they pass EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE. + +- \b EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(TYPE0,TYPE1) - fails if the two vector expression types must have different sizes. +- \b EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(TYPE0,TYPE1) - fails if the two matrix expression types must have different sizes. +- \b EIGEN_STATIC_ASSERT_SIZE_1x1(TYPE) - fails if \a TYPE cannot be an 1x1 expression. + +See StaticAssert.h for details such as what messages they throw. + +\subsection DisableStaticAssert Disabling static assertions + +If \c EIGEN_NO_STATIC_ASSERT is defined, static assertions turn into eigen_assert's, working like: + +\code +#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); +\endcode + +This saves compile time but consumes more run time. \c EIGEN_NO_STATIC_ASSERT is undefined by default. + +*/ +} diff --git a/thirdparty/eigen/doc/TopicEigenExpressionTemplates.dox b/thirdparty/eigen/doc/TopicEigenExpressionTemplates.dox new file mode 100644 index 000000000..b31fd47f9 --- /dev/null +++ b/thirdparty/eigen/doc/TopicEigenExpressionTemplates.dox @@ -0,0 +1,12 @@ +namespace Eigen { + +/** \page TopicEigenExpressionTemplates Expression templates in Eigen + + +TODO: write this dox page! + +Is linked from the tutorial on arithmetic ops. + +*/ + +} diff --git a/thirdparty/eigen/doc/TopicLazyEvaluation.dox b/thirdparty/eigen/doc/TopicLazyEvaluation.dox new file mode 100644 index 000000000..101ef8c72 --- /dev/null +++ b/thirdparty/eigen/doc/TopicLazyEvaluation.dox @@ -0,0 +1,65 @@ +namespace Eigen { + +/** \page TopicLazyEvaluation Lazy Evaluation and Aliasing + +Executive summary: Eigen has intelligent compile-time mechanisms to enable lazy evaluation and removing temporaries where appropriate. +It will handle aliasing automatically in most cases, for example with matrix products. The automatic behavior can be overridden +manually by using the MatrixBase::eval() and MatrixBase::noalias() methods. + +When you write a line of code involving a complex expression such as + +\code mat1 = mat2 + mat3 * (mat4 + mat5); \endcode + +Eigen determines automatically, for each sub-expression, whether to evaluate it into a temporary variable. Indeed, in certain cases it is better to evaluate immediately a sub-expression into a temporary variable, while in other cases it is better to avoid that. + +A traditional math library without expression templates always evaluates all sub-expressions into temporaries. So with this code, + +\code vec1 = vec2 + vec3; \endcode + +a traditional library would evaluate \c vec2 + vec3 into a temporary \c vec4 and then copy \c vec4 into \c vec1. This is of course inefficient: the arrays are traversed twice, so there are a lot of useless load/store operations. + +Expression-templates-based libraries can avoid evaluating sub-expressions into temporaries, which in many cases results in large speed improvements. This is called lazy evaluation as an expression is getting evaluated as late as possible, instead of immediately. However, most other expression-templates-based libraries always choose lazy evaluation. There are two problems with that: first, lazy evaluation is not always a good choice for performance; second, lazy evaluation can be very dangerous, for example with matrix products: doing matrix = matrix*matrix gives a wrong result if the matrix product is lazy-evaluated, because of the way matrix product works. + +For these reasons, Eigen has intelligent compile-time mechanisms to determine automatically when to use lazy evaluation, and when on the contrary it should evaluate immediately into a temporary variable. + +So in the basic example, + +\code matrix1 = matrix2 + matrix3; \endcode + +Eigen chooses lazy evaluation. Thus the arrays are traversed only once, producing optimized code. If you really want to force immediate evaluation, use \link MatrixBase::eval() eval()\endlink: + +\code matrix1 = (matrix2 + matrix3).eval(); \endcode + +Here is now a more involved example: + +\code matrix1 = -matrix2 + matrix3 + 5 * matrix4; \endcode + +Eigen chooses lazy evaluation at every stage in that example, which is clearly the correct choice. In fact, lazy evaluation is the "default choice" and Eigen will choose it except in a few circumstances. + +The first circumstance in which Eigen chooses immediate evaluation, is when it sees an assignment a = b; and the expression \c b has the evaluate-before-assigning \link flags flag\endlink. The most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do + +\code matrix = matrix * matrix; \endcode + +Eigen first evaluates matrix * matrix into a temporary matrix, and then copies it into the original \c matrix. This guarantees a correct result as we saw above that lazy evaluation gives wrong results with matrix products. It also doesn't cost much, as the cost of the matrix product itself is much higher. + +What if you know that the result does no alias the operand of the product and want to force lazy evaluation? Then use \link MatrixBase::noalias() .noalias()\endlink instead. Here is an example: + +\code matrix1.noalias() = matrix2 * matrix2; \endcode + +Here, since we know that matrix2 is not the same matrix as matrix1, we know that lazy evaluation is not dangerous, so we may force lazy evaluation. Concretely, the effect of noalias() here is to bypass the evaluate-before-assigning \link flags flag\endlink. + +The second circumstance in which Eigen chooses immediate evaluation, is when it sees a nested expression such as a + b where \c b is already an expression having the evaluate-before-nesting \link flags flag\endlink. Again, the most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do + +\code matrix1 = matrix2 + matrix3 * matrix4; \endcode + +the product matrix3 * matrix4 gets evaluated immediately into a temporary matrix. Indeed, experiments showed that it is often beneficial for performance to evaluate immediately matrix products when they are nested into bigger expressions. + +The third circumstance in which Eigen chooses immediate evaluation, is when its cost model shows that the total cost of an operation is reduced if a sub-expression gets evaluated into a temporary. Indeed, in certain cases, an intermediate result is sufficiently costly to compute and is reused sufficiently many times, that is worth "caching". Here is an example: + +\code matrix1 = matrix2 * (matrix3 + matrix4); \endcode + +Here, provided the matrices have at least 2 rows and 2 columns, each coefficienct of the expression matrix3 + matrix4 is going to be used several times in the matrix product. Instead of computing the sum everytime, it is much better to compute it once and store it in a temporary variable. Eigen understands this and evaluates matrix3 + matrix4 into a temporary variable before evaluating the product. + +*/ + +} diff --git a/thirdparty/eigen/doc/TopicLinearAlgebraDecompositions.dox b/thirdparty/eigen/doc/TopicLinearAlgebraDecompositions.dox new file mode 100644 index 000000000..491470627 --- /dev/null +++ b/thirdparty/eigen/doc/TopicLinearAlgebraDecompositions.dox @@ -0,0 +1,263 @@ +namespace Eigen { + +/** \eigenManualPage TopicLinearAlgebraDecompositions Catalogue of dense decompositions + +This page presents a catalogue of the dense matrix decompositions offered by Eigen. +For an introduction on linear solvers and decompositions, check this \link TutorialLinearAlgebra page \endlink. +To get an overview of the true relative speed of the different decomposition, check this \link DenseDecompositionBenchmark benchmark \endlink. + +\section TopicLinAlgBigTable Catalogue of decompositions offered by Eigen + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Generic information, not Eigen-specificEigen-specific
DecompositionRequirements on the matrixSpeedAlgorithm reliability and accuracyRank-revealingAllows to compute (besides linear solving)Linear solver provided by EigenMaturity of Eigen's implementationOptimizations
PartialPivLUInvertibleFastDepends on condition number--YesExcellentBlocking, Implicit MT
FullPivLU-SlowProvenYes-YesExcellent-
HouseholderQR-FastDepends on condition number-OrthogonalizationYesExcellentBlocking
ColPivHouseholderQR-FastGoodYesOrthogonalizationYesExcellentSoon: blocking
FullPivHouseholderQR-SlowProvenYesOrthogonalizationYesAverage-
LLTPositive definiteVery fastDepends on condition number--YesExcellentBlocking
LDLTPositive or negative semidefinite1Very fastGood--YesExcellentSoon: blocking
\n Singular values and eigenvalues decompositions
JacobiSVD (two-sided)-Slow (but fast for small matrices)Proven3YesSingular values/vectors, least squaresYes (and does least squares)ExcellentR-SVD
SelfAdjointEigenSolverSelf-adjointFast-average2GoodYesEigenvalues/vectors-ExcellentClosed forms for 2x2 and 3x3
ComplexEigenSolverSquareSlow-very slow2Depends on condition numberYesEigenvalues/vectors-Average-
EigenSolverSquare and realAverage-slow2Depends on condition numberYesEigenvalues/vectors-Average-
GeneralizedSelfAdjointEigenSolverSquareFast-average2Depends on condition number-Generalized eigenvalues/vectors-Good-
\n Helper decompositions
RealSchurSquare and realAverage-slow2Depends on condition numberYes--Average-
ComplexSchurSquareSlow-very slow2Depends on condition numberYes--Average-
TridiagonalizationSelf-adjointFastGood---GoodSoon: blocking
HessenbergDecompositionSquareAverageGood---GoodSoon: blocking
+ +\b Notes: +
    +
  • \b 1: There exist two variants of the LDLT algorithm. Eigen's one produces a pure diagonal D matrix, and therefore it cannot handle indefinite matrices, unlike Lapack's one which produces a block diagonal D matrix.
  • +
  • \b 2: Eigenvalues, SVD and Schur decompositions rely on iterative algorithms. Their convergence speed depends on how well the eigenvalues are separated.
  • +
  • \b 3: Our JacobiSVD is two-sided, making for proven and optimal precision for square matrices. For non-square matrices, we have to use a QR preconditioner first. The default choice, ColPivHouseholderQR, is already very reliable, but if you want it to be proven, use FullPivHouseholderQR instead. +
+ +\section TopicLinAlgTerminology Terminology + +
+
Selfadjoint
+
For a real matrix, selfadjoint is a synonym for symmetric. For a complex matrix, selfadjoint is a synonym for \em hermitian. + More generally, a matrix \f$ A \f$ is selfadjoint if and only if it is equal to its adjoint \f$ A^* \f$. The adjoint is also called the \em conjugate \em transpose.
+
Positive/negative definite
+
A selfadjoint matrix \f$ A \f$ is positive definite if \f$ v^* A v > 0 \f$ for any non zero vector \f$ v \f$. + In the same vein, it is negative definite if \f$ v^* A v < 0 \f$ for any non zero vector \f$ v \f$
+
Positive/negative semidefinite
+
A selfadjoint matrix \f$ A \f$ is positive semi-definite if \f$ v^* A v \ge 0 \f$ for any non zero vector \f$ v \f$. + In the same vein, it is negative semi-definite if \f$ v^* A v \le 0 \f$ for any non zero vector \f$ v \f$
+ +
Blocking
+
Means the algorithm can work per block, whence guaranteeing a good scaling of the performance for large matrices.
+
Implicit Multi Threading (MT)
+
Means the algorithm can take advantage of multicore processors via OpenMP. "Implicit" means the algortihm itself is not parallelized, but that it relies on parallelized matrix-matrix product rountines.
+
Explicit Multi Threading (MT)
+
Means the algorithm is explicitly parallelized to take advantage of multicore processors via OpenMP.
+
Meta-unroller
+
Means the algorithm is automatically and explicitly unrolled for very small fixed size matrices.
+
+
+
+ + +*/ + +} diff --git a/thirdparty/eigen/doc/TopicMultithreading.dox b/thirdparty/eigen/doc/TopicMultithreading.dox new file mode 100644 index 000000000..47c9b261f --- /dev/null +++ b/thirdparty/eigen/doc/TopicMultithreading.dox @@ -0,0 +1,54 @@ +namespace Eigen { + +/** \page TopicMultiThreading Eigen and multi-threading + +\section TopicMultiThreading_MakingEigenMT Make Eigen run in parallel + +Some Eigen's algorithms can exploit the multiple cores present in your hardware. To this end, it is enough to enable OpenMP on your compiler, for instance: + * GCC: \c -fopenmp + * ICC: \c -openmp + * MSVC: check the respective option in the build properties. +You can control the number of thread that will be used using either the OpenMP API or Eigen's API using the following priority: +\code + OMP_NUM_THREADS=n ./my_program + omp_set_num_threads(n); + Eigen::setNbThreads(n); +\endcode +Unless setNbThreads has been called, Eigen uses the number of threads specified by OpenMP. You can restore this behavior by calling \code setNbThreads(0); \endcode +You can query the number of threads that will be used with: +\code +n = Eigen::nbThreads( ); +\endcode +You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token. + +Currently, the following algorithms can make use of multi-threading: + - general dense matrix - matrix products + - PartialPivLU + - row-major-sparse * dense vector/matrix products + - ConjugateGradient with \c Lower|Upper as the \c UpLo template parameter. + - BiCGSTAB with a row-major sparse matrix format. + - LeastSquaresConjugateGradient + +\section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application + +In the case your own application is multithreaded, and multiple threads make calls to Eigen, then you have to initialize Eigen by calling the following routine \b before creating the threads: +\code +#include + +int main(int argc, char** argv) +{ + Eigen::initParallel(); + + ... +} +\endcode + +\note With Eigen 3.3, and a fully C++11 compliant compiler (i.e., thread-safe static local variable initialization), then calling \c initParallel() is optional. + +\warning note that all functions generating random matrices are \b not re-entrant nor thread-safe. Those include DenseBase::Random(), and DenseBase::setRandom() despite a call to Eigen::initParallel(). This is because these functions are based on std::rand which is not re-entrant. For thread-safe random generator, we recommend the use of boost::random or c++11 random feature. + +In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section. + +*/ + +} diff --git a/thirdparty/eigen/doc/TopicResizing.dox b/thirdparty/eigen/doc/TopicResizing.dox new file mode 100644 index 000000000..c323e17ad --- /dev/null +++ b/thirdparty/eigen/doc/TopicResizing.dox @@ -0,0 +1,11 @@ +namespace Eigen { + +/** \page TopicResizing Resizing + + +TODO: write this dox page! + +Is linked from the tutorial on the Matrix class. + +*/ +} diff --git a/thirdparty/eigen/doc/TopicScalarTypes.dox b/thirdparty/eigen/doc/TopicScalarTypes.dox new file mode 100644 index 000000000..2ff03c198 --- /dev/null +++ b/thirdparty/eigen/doc/TopicScalarTypes.dox @@ -0,0 +1,12 @@ +namespace Eigen { + +/** \page TopicScalarTypes Scalar types + + +TODO: write this dox page! + +Is linked from the tutorial on the Matrix class. + +*/ + +} diff --git a/thirdparty/eigen/doc/TopicVectorization.dox b/thirdparty/eigen/doc/TopicVectorization.dox new file mode 100644 index 000000000..274d0451b --- /dev/null +++ b/thirdparty/eigen/doc/TopicVectorization.dox @@ -0,0 +1,9 @@ +namespace Eigen { + +/** \page TopicVectorization Vectorization + + +TODO: write this dox page! + +*/ +} diff --git a/thirdparty/eigen/doc/TutorialAdvancedInitialization.dox b/thirdparty/eigen/doc/TutorialAdvancedInitialization.dox new file mode 100644 index 000000000..50374d0d0 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialAdvancedInitialization.dox @@ -0,0 +1,162 @@ +namespace Eigen { + +/** \eigenManualPage TutorialAdvancedInitialization Advanced initialization + +This page discusses several advanced methods for initializing matrices. It gives more details on the +comma-initializer, which was introduced before. It also explains how to get special matrices such as the +identity matrix and the zero matrix. + +\eigenAutoToc + +\section TutorialAdvancedInitializationCommaInitializer The comma initializer + +Eigen offers a comma initializer syntax which allows the user to easily set all the coefficients of a matrix, +vector or array. Simply list the coefficients, starting at the top-left corner and moving from left to right +and from the top to the bottom. The size of the object needs to be specified beforehand. If you list too few +or too many coefficients, Eigen will complain. + + + + +
Example:Output:
+\include Tutorial_commainit_01.cpp + +\verbinclude Tutorial_commainit_01.out +
+ +Moreover, the elements of the initialization list may themselves be vectors or matrices. A common use is +to join vectors or matrices together. For example, here is how to join two row vectors together. Remember +that you have to set the size before you can use the comma initializer. + + + + +
Example:Output:
+\include Tutorial_AdvancedInitialization_Join.cpp + +\verbinclude Tutorial_AdvancedInitialization_Join.out +
+ +We can use the same technique to initialize matrices with a block structure. + + + + +
Example:Output:
+\include Tutorial_AdvancedInitialization_Block.cpp + +\verbinclude Tutorial_AdvancedInitialization_Block.out +
+ +The comma initializer can also be used to fill block expressions such as m.row(i). Here is a more +complicated way to get the same result as in the first example above: + + + + +
Example:Output:
+\include Tutorial_commainit_01b.cpp + +\verbinclude Tutorial_commainit_01b.out +
+ + +\section TutorialAdvancedInitializationSpecialMatrices Special matrices and arrays + +The Matrix and Array classes have static methods like \link DenseBase::Zero() Zero()\endlink, which can be +used to initialize all coefficients to zero. There are three variants. The first variant takes no arguments +and can only be used for fixed-size objects. If you want to initialize a dynamic-size object to zero, you need +to specify the size. Thus, the second variant requires one argument and can be used for one-dimensional +dynamic-size objects, while the third variant requires two arguments and can be used for two-dimensional +objects. All three variants are illustrated in the following example: + + + + +
Example:Output:
+\include Tutorial_AdvancedInitialization_Zero.cpp + +\verbinclude Tutorial_AdvancedInitialization_Zero.out +
+ +Similarly, the static method \link DenseBase::Constant() Constant\endlink(value) sets all coefficients to \c value. +If the size of the object needs to be specified, the additional arguments go before the \c value +argument, as in MatrixXd::Constant(rows, cols, value). The method \link DenseBase::Random() Random() +\endlink fills the matrix or array with random coefficients. The identity matrix can be obtained by calling +\link MatrixBase::Identity() Identity()\endlink; this method is only available for Matrix, not for Array, +because "identity matrix" is a linear algebra concept. The method +\link DenseBase::LinSpaced LinSpaced\endlink(size, low, high) is only available for vectors and +one-dimensional arrays; it yields a vector of the specified size whose coefficients are equally spaced between +\c low and \c high. The method \c LinSpaced() is illustrated in the following example, which prints a table +with angles in degrees, the corresponding angle in radians, and their sine and cosine. + + + + +
Example:Output:
+\include Tutorial_AdvancedInitialization_LinSpaced.cpp + +\verbinclude Tutorial_AdvancedInitialization_LinSpaced.out +
+ +This example shows that objects like the ones returned by LinSpaced() can be assigned to variables (and +expressions). Eigen defines utility functions like \link DenseBase::setZero() setZero()\endlink, +\link MatrixBase::setIdentity() \endlink and \link DenseBase::setLinSpaced() \endlink to do this +conveniently. The following example contrasts three ways to construct the matrix +\f$ J = \bigl[ \begin{smallmatrix} O & I \\ I & O \end{smallmatrix} \bigr] \f$: using static methods and +assignment, using static methods and the comma-initializer, or using the setXxx() methods. + + + + +
Example:Output:
+\include Tutorial_AdvancedInitialization_ThreeWays.cpp + +\verbinclude Tutorial_AdvancedInitialization_ThreeWays.out +
+ +A summary of all pre-defined matrix, vector and array objects can be found in the \ref QuickRefPage. + + +\section TutorialAdvancedInitializationTemporaryObjects Usage as temporary objects + +As shown above, static methods as Zero() and Constant() can be used to initialize variables at the time of +declaration or at the right-hand side of an assignment operator. You can think of these methods as returning a +matrix or array; in fact, they return so-called \ref TopicEigenExpressionTemplates "expression objects" which +evaluate to a matrix or array when needed, so that this syntax does not incur any overhead. + +These expressions can also be used as a temporary object. The second example in +the \ref GettingStarted guide, which we reproduce here, already illustrates this. + + + + +
Example:Output:
+\include QuickStart_example2_dynamic.cpp + +\verbinclude QuickStart_example2_dynamic.out +
+ +The expression m + MatrixXf::Constant(3,3,1.2) constructs the 3-by-3 matrix expression with all its coefficients +equal to 1.2 plus the corresponding coefficient of \a m. + +The comma-initializer, too, can also be used to construct temporary objects. The following example constructs a random +matrix of size 2-by-3, and then multiplies this matrix on the left with +\f$ \bigl[ \begin{smallmatrix} 0 & 1 \\ 1 & 0 \end{smallmatrix} \bigr] \f$. + + + + +
Example:Output:
+\include Tutorial_AdvancedInitialization_CommaTemporary.cpp + +\verbinclude Tutorial_AdvancedInitialization_CommaTemporary.out +
+ +The \link CommaInitializer::finished() finished() \endlink method is necessary here to get the actual matrix +object once the comma initialization of our temporary submatrix is done. + + +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialArrayClass.dox b/thirdparty/eigen/doc/TutorialArrayClass.dox new file mode 100644 index 000000000..f6f351091 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialArrayClass.dox @@ -0,0 +1,192 @@ +namespace Eigen { + +/** \eigenManualPage TutorialArrayClass The Array class and coefficient-wise operations + +This page aims to provide an overview and explanations on how to use +Eigen's Array class. + +\eigenAutoToc + +\section TutorialArrayClassIntro What is the Array class? + +The Array class provides general-purpose arrays, as opposed to the Matrix class which +is intended for linear algebra. Furthermore, the Array class provides an easy way to +perform coefficient-wise operations, which might not have a linear algebraic meaning, +such as adding a constant to every coefficient in the array or multiplying two arrays coefficient-wise. + + +\section TutorialArrayClassTypes Array types +Array is a class template taking the same template parameters as Matrix. +As with Matrix, the first three template parameters are mandatory: +\code +Array +\endcode +The last three template parameters are optional. Since this is exactly the same as for Matrix, +we won't explain it again here and just refer to \ref TutorialMatrixClass. + +Eigen also provides typedefs for some common cases, in a way that is similar to the Matrix typedefs +but with some slight differences, as the word "array" is used for both 1-dimensional and 2-dimensional arrays. +We adopt the convention that typedefs of the form ArrayNt stand for 1-dimensional arrays, where N and t are +the size and the scalar type, as in the Matrix typedefs explained on \ref TutorialMatrixClass "this page". For 2-dimensional arrays, we +use typedefs of the form ArrayNNt. Some examples are shown in the following table: + + + + + + + + + + + + + + + + + + + + + + +
Type Typedef
\code Array \endcode \code ArrayXf \endcode
\code Array \endcode \code Array3f \endcode
\code Array \endcode \code ArrayXXd \endcode
\code Array \endcode \code Array33d \endcode
+ + +\section TutorialArrayClassAccess Accessing values inside an Array + +The parenthesis operator is overloaded to provide write and read access to the coefficients of an array, just as with matrices. +Furthermore, the \c << operator can be used to initialize arrays (via the comma initializer) or to print them. + + + + +
Example:Output:
+\include Tutorial_ArrayClass_accessors.cpp + +\verbinclude Tutorial_ArrayClass_accessors.out +
+ +For more information about the comma initializer, see \ref TutorialAdvancedInitialization. + + +\section TutorialArrayClassAddSub Addition and subtraction + +Adding and subtracting two arrays is the same as for matrices. +The operation is valid if both arrays have the same size, and the addition or subtraction is done coefficient-wise. + +Arrays also support expressions of the form array + scalar which add a scalar to each coefficient in the array. +This provides a functionality that is not directly available for Matrix objects. + + + + +
Example:Output:
+\include Tutorial_ArrayClass_addition.cpp + +\verbinclude Tutorial_ArrayClass_addition.out +
+ + +\section TutorialArrayClassMult Array multiplication + +First of all, of course you can multiply an array by a scalar, this works in the same way as matrices. Where arrays +are fundamentally different from matrices, is when you multiply two together. Matrices interpret +multiplication as matrix product and arrays interpret multiplication as coefficient-wise product. Thus, two +arrays can be multiplied if and only if they have the same dimensions. + + + + +
Example:Output:
+\include Tutorial_ArrayClass_mult.cpp + +\verbinclude Tutorial_ArrayClass_mult.out +
+ + +\section TutorialArrayClassCwiseOther Other coefficient-wise operations + +The Array class defines other coefficient-wise operations besides the addition, subtraction and multiplication +operators described above. For example, the \link ArrayBase::abs() .abs() \endlink method takes the absolute +value of each coefficient, while \link ArrayBase::sqrt() .sqrt() \endlink computes the square root of the +coefficients. If you have two arrays of the same size, you can call \link ArrayBase::min(const Eigen::ArrayBase&) const .min(.) \endlink to +construct the array whose coefficients are the minimum of the corresponding coefficients of the two given +arrays. These operations are illustrated in the following example. + + + + +
Example:Output:
+\include Tutorial_ArrayClass_cwise_other.cpp + +\verbinclude Tutorial_ArrayClass_cwise_other.out +
+ +More coefficient-wise operations can be found in the \ref QuickRefPage. + + +\section TutorialArrayClassConvert Converting between array and matrix expressions + +When should you use objects of the Matrix class and when should you use objects of the Array class? You cannot +apply Matrix operations on arrays, or Array operations on matrices. Thus, if you need to do linear algebraic +operations such as matrix multiplication, then you should use matrices; if you need to do coefficient-wise +operations, then you should use arrays. However, sometimes it is not that simple, but you need to use both +Matrix and Array operations. In that case, you need to convert a matrix to an array or reversely. This gives +access to all operations regardless of the choice of declaring objects as arrays or as matrices. + +\link MatrixBase Matrix expressions \endlink have an \link MatrixBase::array() .array() \endlink method that +'converts' them into \link ArrayBase array expressions\endlink, so that coefficient-wise operations +can be applied easily. Conversely, \link ArrayBase array expressions \endlink +have a \link ArrayBase::matrix() .matrix() \endlink method. As with all Eigen expression abstractions, +this doesn't have any runtime cost (provided that you let your compiler optimize). +Both \link MatrixBase::array() .array() \endlink and \link ArrayBase::matrix() .matrix() \endlink +can be used as rvalues and as lvalues. + +Mixing matrices and arrays in an expression is forbidden with Eigen. For instance, you cannot add a matrix and +array directly; the operands of a \c + operator should either both be matrices or both be arrays. However, +it is easy to convert from one to the other with \link MatrixBase::array() .array() \endlink and +\link ArrayBase::matrix() .matrix()\endlink. The exception to this rule is the assignment operator: it is +allowed to assign a matrix expression to an array variable, or to assign an array expression to a matrix +variable. + +The following example shows how to use array operations on a Matrix object by employing the +\link MatrixBase::array() .array() \endlink method. For example, the statement +result = m.array() * n.array() takes two matrices \c m and \c n, converts them both to an array, uses +* to multiply them coefficient-wise and assigns the result to the matrix variable \c result (this is legal +because Eigen allows assigning array expressions to matrix variables). + +As a matter of fact, this usage case is so common that Eigen provides a \link MatrixBase::cwiseProduct const +.cwiseProduct(.) \endlink method for matrices to compute the coefficient-wise product. This is also shown in +the example program. + + + + +
Example:Output:
+\include Tutorial_ArrayClass_interop_matrix.cpp + +\verbinclude Tutorial_ArrayClass_interop_matrix.out +
+ +Similarly, if \c array1 and \c array2 are arrays, then the expression array1.matrix() * array2.matrix() +computes their matrix product. + +Here is a more advanced example. The expression (m.array() + 4).matrix() * m adds 4 to every +coefficient in the matrix \c m and then computes the matrix product of the result with \c m. Similarly, the +expression (m.array() * n.array()).matrix() * m computes the coefficient-wise product of the matrices +\c m and \c n and then the matrix product of the result with \c m. + + + + +
Example:Output:
+\include Tutorial_ArrayClass_interop.cpp + +\verbinclude Tutorial_ArrayClass_interop.out +
+ +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialBlockOperations.dox b/thirdparty/eigen/doc/TutorialBlockOperations.dox new file mode 100644 index 000000000..a2d8c97cc --- /dev/null +++ b/thirdparty/eigen/doc/TutorialBlockOperations.dox @@ -0,0 +1,228 @@ +namespace Eigen { + +/** \eigenManualPage TutorialBlockOperations Block operations + +This page explains the essentials of block operations. +A block is a rectangular part of a matrix or array. Blocks expressions can be used both +as rvalues and as lvalues. As usual with Eigen expressions, this abstraction has zero runtime cost +provided that you let your compiler optimize. + +\eigenAutoToc + +\section TutorialBlockOperationsUsing Using block operations + +The most general block operation in Eigen is called \link DenseBase::block() .block() \endlink. +There are two versions, whose syntax is as follows: + + + + + + + + +
\b %Block \b operation +Version constructing a \n dynamic-size block expressionVersion constructing a \n fixed-size block expression
%Block of size (p,q), starting at (i,j)\code +matrix.block(i,j,p,q);\endcode \code +matrix.block(i,j);\endcode
+ +As always in Eigen, indices start at 0. + +Both versions can be used on fixed-size and dynamic-size matrices and arrays. +These two expressions are semantically equivalent. +The only difference is that the fixed-size version will typically give you faster code if the block size is small, +but requires this size to be known at compile time. + +The following program uses the dynamic-size and fixed-size versions to print the values of several blocks inside a +matrix. + + + + +
Example:Output:
+\include Tutorial_BlockOperations_print_block.cpp + +\verbinclude Tutorial_BlockOperations_print_block.out +
+ +In the above example the \link DenseBase::block() .block() \endlink function was employed as a \em rvalue, i.e. +it was only read from. However, blocks can also be used as \em lvalues, meaning that you can assign to a block. + +This is illustrated in the following example. This example also demonstrates blocks in arrays, which works exactly like the above-demonstrated blocks in matrices. + + + + +
Example:Output:
+\include Tutorial_BlockOperations_block_assignment.cpp + +\verbinclude Tutorial_BlockOperations_block_assignment.out +
+ +While the \link DenseBase::block() .block() \endlink method can be used for any block operation, there are +other methods for special cases, providing more specialized API and/or better performance. On the topic of performance, all what +matters is that you give Eigen as much information as possible at compile time. For example, if your block is a single whole column in a matrix, +using the specialized \link DenseBase::col() .col() \endlink function described below lets Eigen know that, which can give it optimization opportunities. + +The rest of this page describes these specialized methods. + +\section TutorialBlockOperationsSyntaxColumnRows Columns and rows + +Individual columns and rows are special cases of blocks. Eigen provides methods to easily address them: +\link DenseBase::col() .col() \endlink and \link DenseBase::row() .row()\endlink. + + + + + + + + + + +
%Block operationMethod
ith row + \link DenseBase::row() * \endlink\code +matrix.row(i);\endcode
jth column + \link DenseBase::col() * \endlink\code +matrix.col(j);\endcode
+ +The argument for \p col() and \p row() is the index of the column or row to be accessed. As always in Eigen, indices start at 0. + + + + +
Example:Output:
+\include Tutorial_BlockOperations_colrow.cpp + +\verbinclude Tutorial_BlockOperations_colrow.out +
+ +That example also demonstrates that block expressions (here columns) can be used in arithmetic like any other expression. + + +\section TutorialBlockOperationsSyntaxCorners Corner-related operations + +Eigen also provides special methods for blocks that are flushed against one of the corners or sides of a +matrix or array. For instance, \link DenseBase::topLeftCorner() .topLeftCorner() \endlink can be used to refer +to a block in the top-left corner of a matrix. + +The different possibilities are summarized in the following table: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
%Block \b operation +Version constructing a \n dynamic-size block expressionVersion constructing a \n fixed-size block expression
Top-left p by q block \link DenseBase::topLeftCorner() * \endlink\code +matrix.topLeftCorner(p,q);\endcode \code +matrix.topLeftCorner();\endcode
Bottom-left p by q block + \link DenseBase::bottomLeftCorner() * \endlink\code +matrix.bottomLeftCorner(p,q);\endcode \code +matrix.bottomLeftCorner();\endcode
Top-right p by q block + \link DenseBase::topRightCorner() * \endlink\code +matrix.topRightCorner(p,q);\endcode \code +matrix.topRightCorner();\endcode
Bottom-right p by q block + \link DenseBase::bottomRightCorner() * \endlink\code +matrix.bottomRightCorner(p,q);\endcode \code +matrix.bottomRightCorner();\endcode
%Block containing the first q rows + \link DenseBase::topRows() * \endlink\code +matrix.topRows(q);\endcode \code +matrix.topRows();\endcode
%Block containing the last q rows + \link DenseBase::bottomRows() * \endlink\code +matrix.bottomRows(q);\endcode \code +matrix.bottomRows();\endcode
%Block containing the first p columns + \link DenseBase::leftCols() * \endlink\code +matrix.leftCols(p);\endcode \code +matrix.leftCols

();\endcode

%Block containing the last q columns + \link DenseBase::rightCols() * \endlink\code +matrix.rightCols(q);\endcode \code +matrix.rightCols();\endcode
+ +Here is a simple example illustrating the use of the operations presented above: + + + + +
Example:Output:
+\include Tutorial_BlockOperations_corner.cpp + +\verbinclude Tutorial_BlockOperations_corner.out +
+ + +\section TutorialBlockOperationsSyntaxVectors Block operations for vectors + +Eigen also provides a set of block operations designed specifically for the special case of vectors and one-dimensional arrays: + + + + + + + + + + + + + + + + + +
%Block operationVersion constructing a \n dynamic-size block expressionVersion constructing a \n fixed-size block expression
%Block containing the first \p n elements + \link DenseBase::head() * \endlink\code +vector.head(n);\endcode \code +vector.head();\endcode
%Block containing the last \p n elements + \link DenseBase::tail() * \endlink\code +vector.tail(n);\endcode \code +vector.tail();\endcode
%Block containing \p n elements, starting at position \p i + \link DenseBase::segment() * \endlink\code +vector.segment(i,n);\endcode \code +vector.segment(i);\endcode
+ + +An example is presented below: + + + +
Example:Output:
+\include Tutorial_BlockOperations_vector.cpp + +\verbinclude Tutorial_BlockOperations_vector.out +
+ +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialGeometry.dox b/thirdparty/eigen/doc/TutorialGeometry.dox new file mode 100644 index 000000000..2e1420f98 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialGeometry.dox @@ -0,0 +1,242 @@ +namespace Eigen { + +/** \eigenManualPage TutorialGeometry Space transformations + +In this page, we will introduce the many possibilities offered by the \ref Geometry_Module "geometry module" to deal with 2D and 3D rotations and projective or affine transformations. + +\eigenAutoToc + +Eigen's Geometry module provides two different kinds of geometric transformations: + - Abstract transformations, such as rotations (represented by \ref AngleAxis "angle and axis" or by a \ref Quaternion "quaternion"), \ref Translation "translations", \ref Scaling "scalings". These transformations are NOT represented as matrices, but you can nevertheless mix them with matrices and vectors in expressions, and convert them to matrices if you wish. + - Projective or affine transformation matrices: see the Transform class. These are really matrices. + +\note If you are working with OpenGL 4x4 matrices then Affine3f and Affine3d are what you want. Since Eigen defaults to column-major storage, you can directly use the Transform::data() method to pass your transformation matrix to OpenGL. + +You can construct a Transform from an abstract transformation, like this: +\code + Transform t(AngleAxis(angle,axis)); +\endcode +or like this: +\code + Transform t; + t = AngleAxis(angle,axis); +\endcode +But note that unfortunately, because of how C++ works, you can \b not do this: +\code + Transform t = AngleAxis(angle,axis); +\endcode +\b Explanation: In the C++ language, this would require Transform to have a non-explicit conversion constructor from AngleAxis, but we really don't want to allow implicit casting here. + + +\section TutorialGeoElementaryTransformations Transformation types + + + + + + + + + + +
Transformation typeTypical initialization code
+\ref Rotation2D "2D rotation" from an angle\code +Rotation2D rot2(angle_in_radian);\endcode
+3D rotation as an \ref AngleAxis "angle + axis"\code +AngleAxis aa(angle_in_radian, Vector3f(ax,ay,az));\endcode +The axis vector must be normalized.
+3D rotation as a \ref Quaternion "quaternion"\code +Quaternion q; q = AngleAxis(angle_in_radian, axis);\endcode
+N-D Scaling\code +Scaling(sx, sy) +Scaling(sx, sy, sz) +Scaling(s) +Scaling(vecN)\endcode
+N-D Translation\code +Translation(tx, ty) +Translation(tx, ty, tz) +Translation(s) +Translation(vecN)\endcode
+N-D \ref TutorialGeoTransform "Affine transformation"\code +Transform t = concatenation_of_any_transformations; +Transform t = Translation3f(p) * AngleAxisf(a,axis) * Scaling(s);\endcode
+N-D Linear transformations \n +(pure rotations, \n scaling, etc.)\code +Matrix t = concatenation_of_rotations_and_scalings; +Matrix t = Rotation2Df(a) * Scaling(s); +Matrix t = AngleAxisf(a,axis) * Scaling(s);\endcode
+ +Notes on rotations\n To transform more than a single vector the preferred +representations are rotation matrices, while for other usages Quaternion is the +representation of choice as they are compact, fast and stable. Finally Rotation2D and +AngleAxis are mainly convenient types to create other rotation objects. + +Notes on Translation and Scaling\n Like AngleAxis, these classes were +designed to simplify the creation/initialization of linear (Matrix) and affine (Transform) +transformations. Nevertheless, unlike AngleAxis which is inefficient to use, these classes +might still be interesting to write generic and efficient algorithms taking as input any +kind of transformations. + +Any of the above transformation types can be converted to any other types of the same nature, +or to a more generic type. Here are some additional examples: + + +
\code +Rotation2Df r; r = Matrix2f(..); // assumes a pure rotation matrix +AngleAxisf aa; aa = Quaternionf(..); +AngleAxisf aa; aa = Matrix3f(..); // assumes a pure rotation matrix +Matrix2f m; m = Rotation2Df(..); +Matrix3f m; m = Quaternionf(..); Matrix3f m; m = Scaling(..); +Affine3f m; m = AngleAxis3f(..); Affine3f m; m = Scaling(..); +Affine3f m; m = Translation3f(..); Affine3f m; m = Matrix3f(..); +\endcode
+ + +top\section TutorialGeoCommontransformationAPI Common API across transformation types + +To some extent, Eigen's \ref Geometry_Module "geometry module" allows you to write +generic algorithms working on any kind of transformation representations: + + + + + +
+Concatenation of two transformations\code +gen1 * gen2;\endcode
Apply the transformation to a vector\code +vec2 = gen1 * vec1;\endcode
Get the inverse of the transformation\code +gen2 = gen1.inverse();\endcode
Spherical interpolation \n (Rotation2D and Quaternion only)\code +rot3 = rot1.slerp(alpha,rot2);\endcode
+ + + +top\section TutorialGeoTransform Affine transformations +Generic affine transformations are represented by the Transform class which internaly +is a (Dim+1)^2 matrix. In Eigen we have chosen to not distinghish between points and +vectors such that all points are actually represented by displacement vectors from the +origin ( \f$ \mathbf{p} \equiv \mathbf{p}-0 \f$ ). With that in mind, real points and +vector distinguish when the transformation is applied. + + + + + + + + +
+Apply the transformation to a \b point \code +VectorNf p1, p2; +p2 = t * p1;\endcode
+Apply the transformation to a \b vector \code +VectorNf vec1, vec2; +vec2 = t.linear() * vec1;\endcode
+Apply a \em general transformation \n to a \b normal \b vector \n +\code +VectorNf n1, n2; +MatrixNf normalMatrix = t.linear().inverse().transpose(); +n2 = (normalMatrix * n1).normalized();\endcode
(See subject 5.27 of this faq for the explanations)
+Apply a transformation with \em pure \em rotation \n to a \b normal \b vector +(no scaling, no shear)\code +n2 = t.linear() * n1;\endcode
+OpenGL compatibility \b 3D \code +glLoadMatrixf(t.data());\endcode
+OpenGL compatibility \b 2D \code +Affine3f aux(Affine3f::Identity()); +aux.linear().topLeftCorner<2,2>() = t.linear(); +aux.translation().start<2>() = t.translation(); +glLoadMatrixf(aux.data());\endcode
+ +\b Component \b accessors + + + + + + +
+full read-write access to the internal matrix\code +t.matrix() = matN1xN1; // N1 means N+1 +matN1xN1 = t.matrix(); +\endcode
+coefficient accessors\code +t(i,j) = scalar; <=> t.matrix()(i,j) = scalar; +scalar = t(i,j); <=> scalar = t.matrix()(i,j); +\endcode
+translation part\code +t.translation() = vecN; +vecN = t.translation(); +\endcode
+linear part\code +t.linear() = matNxN; +matNxN = t.linear(); +\endcode
+extract the rotation matrix\code +matNxN = t.rotation(); +\endcode
+ + +\b Transformation \b creation \n +While transformation objects can be created and updated concatenating elementary transformations, +the Transform class also features a procedural API: + + + + + + +
procedural APIequivalent natural API
Translation\code +t.translate(Vector_(tx,ty,..)); +t.pretranslate(Vector_(tx,ty,..)); +\endcode\code +t *= Translation_(tx,ty,..); +t = Translation_(tx,ty,..) * t; +\endcode
\b Rotation \n In 2D and for the procedural API, any_rotation can also \n be an angle in radian\code +t.rotate(any_rotation); +t.prerotate(any_rotation); +\endcode\code +t *= any_rotation; +t = any_rotation * t; +\endcode
Scaling\code +t.scale(Vector_(sx,sy,..)); +t.scale(s); +t.prescale(Vector_(sx,sy,..)); +t.prescale(s); +\endcode\code +t *= Scaling(sx,sy,..); +t *= Scaling(s); +t = Scaling(sx,sy,..) * t; +t = Scaling(s) * t; +\endcode
Shear transformation \n ( \b 2D \b only ! )\code +t.shear(sx,sy); +t.preshear(sx,sy); +\endcode
+ +Note that in both API, any many transformations can be concatenated in a single expression as shown in the two following equivalent examples: + + + +
\code +t.pretranslate(..).rotate(..).translate(..).scale(..); +\endcode
\code +t = Translation_(..) * t * RotationType(..) * Translation_(..) * Scaling(..); +\endcode
+ + + +top\section TutorialGeoEulerAngles Euler angles + + +
+Euler angles might be convenient to create rotation objects. +On the other hand, since there exist 24 different conventions, they are pretty confusing to use. This example shows how +to create a rotation matrix according to the 2-1-2 convention.\code +Matrix3f m; +m = AngleAxisf(angle1, Vector3f::UnitZ()) + * AngleAxisf(angle2, Vector3f::UnitY()) + * AngleAxisf(angle3, Vector3f::UnitZ()); +\endcode
+ +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialLinearAlgebra.dox b/thirdparty/eigen/doc/TutorialLinearAlgebra.dox new file mode 100644 index 000000000..cb92ceeae --- /dev/null +++ b/thirdparty/eigen/doc/TutorialLinearAlgebra.dox @@ -0,0 +1,272 @@ +namespace Eigen { + +/** \eigenManualPage TutorialLinearAlgebra Linear algebra and decompositions + +This page explains how to solve linear systems, compute various decompositions such as LU, +QR, %SVD, eigendecompositions... After reading this page, don't miss our +\link TopicLinearAlgebraDecompositions catalogue \endlink of dense matrix decompositions. + +\eigenAutoToc + +\section TutorialLinAlgBasicSolve Basic linear solving + +\b The \b problem: You have a system of equations, that you have written as a single matrix equation + \f[ Ax \: = \: b \f] +Where \a A and \a b are matrices (\a b could be a vector, as a special case). You want to find a solution \a x. + +\b The \b solution: You can choose between various decompositions, depending on what your matrix \a A looks like, +and depending on whether you favor speed or accuracy. However, let's start with an example that works in all cases, +and is a good compromise: + + + + + + +
Example:Output:
\include TutorialLinAlgExSolveColPivHouseholderQR.cpp \verbinclude TutorialLinAlgExSolveColPivHouseholderQR.out
+ +In this example, the colPivHouseholderQr() method returns an object of class ColPivHouseholderQR. Since here the +matrix is of type Matrix3f, this line could have been replaced by: +\code +ColPivHouseholderQR dec(A); +Vector3f x = dec.solve(b); +\endcode + +Here, ColPivHouseholderQR is a QR decomposition with column pivoting. It's a good compromise for this tutorial, as it +works for all matrices while being quite fast. Here is a table of some other decompositions that you can choose from, +depending on your matrix and the trade-off you want to make: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DecompositionMethodRequirements
on the matrix
Speed
(small-to-medium)
Speed
(large)
Accuracy
PartialPivLUpartialPivLu()Invertible+++++
FullPivLUfullPivLu()None-- -+++
HouseholderQRhouseholderQr()None+++++
ColPivHouseholderQRcolPivHouseholderQr()None++-+++
FullPivHouseholderQRfullPivHouseholderQr()None-- -+++
LLTllt()Positive definite+++++++
LDLTldlt()Positive or negative
semidefinite
++++++
JacobiSVDjacobiSvd()None- -- - -+++
+ +All of these decompositions offer a solve() method that works as in the above example. + +For example, if your matrix is positive definite, the above table says that a very good +choice is then the LLT or LDLT decomposition. Here's an example, also demonstrating that using a general +matrix (not a vector) as right hand side is possible. + + + + + + + +
Example:Output:
\include TutorialLinAlgExSolveLDLT.cpp \verbinclude TutorialLinAlgExSolveLDLT.out
+ +For a \ref TopicLinearAlgebraDecompositions "much more complete table" comparing all decompositions supported by Eigen (notice that Eigen +supports many other decompositions), see our special page on +\ref TopicLinearAlgebraDecompositions "this topic". + +\section TutorialLinAlgSolutionExists Checking if a solution really exists + +Only you know what error margin you want to allow for a solution to be considered valid. +So Eigen lets you do this computation for yourself, if you want to, as in this example: + + + + + + + +
Example:Output:
\include TutorialLinAlgExComputeSolveError.cpp \verbinclude TutorialLinAlgExComputeSolveError.out
+ +\section TutorialLinAlgEigensolving Computing eigenvalues and eigenvectors + +You need an eigendecomposition here, see available such decompositions on \ref TopicLinearAlgebraDecompositions "this page". +Make sure to check if your matrix is self-adjoint, as is often the case in these problems. Here's an example using +SelfAdjointEigenSolver, it could easily be adapted to general matrices using EigenSolver or ComplexEigenSolver. + +The computation of eigenvalues and eigenvectors does not necessarily converge, but such failure to converge is +very rare. The call to info() is to check for this possibility. + + + + + + + +
Example:Output:
\include TutorialLinAlgSelfAdjointEigenSolver.cpp \verbinclude TutorialLinAlgSelfAdjointEigenSolver.out
+ +\section TutorialLinAlgInverse Computing inverse and determinant + +First of all, make sure that you really want this. While inverse and determinant are fundamental mathematical concepts, +in \em numerical linear algebra they are not as popular as in pure mathematics. Inverse computations are often +advantageously replaced by solve() operations, and the determinant is often \em not a good way of checking if a matrix +is invertible. + +However, for \em very \em small matrices, the above is not true, and inverse and determinant can be very useful. + +While certain decompositions, such as PartialPivLU and FullPivLU, offer inverse() and determinant() methods, you can also +call inverse() and determinant() directly on a matrix. If your matrix is of a very small fixed size (at most 4x4) this +allows Eigen to avoid performing a LU decomposition, and instead use formulas that are more efficient on such small matrices. + +Here is an example: + + + + + + +
Example:Output:
\include TutorialLinAlgInverseDeterminant.cpp \verbinclude TutorialLinAlgInverseDeterminant.out
+ +\section TutorialLinAlgLeastsquares Least squares solving + +The most accurate method to do least squares solving is with a SVD decomposition. Eigen provides one +as the JacobiSVD class, and its solve() is doing least-squares solving. + +Here is an example: + + + + + + +
Example:Output:
\include TutorialLinAlgSVDSolve.cpp \verbinclude TutorialLinAlgSVDSolve.out
+ +Another methods, potentially faster but less reliable, are to use a Cholesky decomposition of the +normal matrix or a QR decomposition. Our page on \link LeastSquares least squares solving \endlink +has more details. + + +\section TutorialLinAlgSeparateComputation Separating the computation from the construction + +In the above examples, the decomposition was computed at the same time that the decomposition object was constructed. +There are however situations where you might want to separate these two things, for example if you don't know, +at the time of the construction, the matrix that you will want to decompose; or if you want to reuse an existing +decomposition object. + +What makes this possible is that: +\li all decompositions have a default constructor, +\li all decompositions have a compute(matrix) method that does the computation, and that may be called again + on an already-computed decomposition, reinitializing it. + +For example: + + + + + + + +
Example:Output:
\include TutorialLinAlgComputeTwice.cpp \verbinclude TutorialLinAlgComputeTwice.out
+ +Finally, you can tell the decomposition constructor to preallocate storage for decomposing matrices of a given size, +so that when you subsequently decompose such matrices, no dynamic memory allocation is performed (of course, if you +are using fixed-size matrices, no dynamic memory allocation happens at all). This is done by just +passing the size to the decomposition constructor, as in this example: +\code +HouseholderQR qr(50,50); +MatrixXf A = MatrixXf::Random(50,50); +qr.compute(A); // no dynamic memory allocation +\endcode + +\section TutorialLinAlgRankRevealing Rank-revealing decompositions + +Certain decompositions are rank-revealing, i.e. are able to compute the rank of a matrix. These are typically +also the decompositions that behave best in the face of a non-full-rank matrix (which in the square case means a +singular matrix). On \ref TopicLinearAlgebraDecompositions "this table" you can see for all our decompositions +whether they are rank-revealing or not. + +Rank-revealing decompositions offer at least a rank() method. They can also offer convenience methods such as isInvertible(), +and some are also providing methods to compute the kernel (null-space) and image (column-space) of the matrix, as is the +case with FullPivLU: + + + + + + + +
Example:Output:
\include TutorialLinAlgRankRevealing.cpp \verbinclude TutorialLinAlgRankRevealing.out
+ +Of course, any rank computation depends on the choice of an arbitrary threshold, since practically no +floating-point matrix is \em exactly rank-deficient. Eigen picks a sensible default threshold, which depends +on the decomposition but is typically the diagonal size times machine epsilon. While this is the best default we +could pick, only you know what is the right threshold for your application. You can set this by calling setThreshold() +on your decomposition object before calling rank() or any other method that needs to use such a threshold. +The decomposition itself, i.e. the compute() method, is independent of the threshold. You don't need to recompute the +decomposition after you've changed the threshold. + + + + + + + +
Example:Output:
\include TutorialLinAlgSetThreshold.cpp \verbinclude TutorialLinAlgSetThreshold.out
+ +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialMapClass.dox b/thirdparty/eigen/doc/TutorialMapClass.dox new file mode 100644 index 000000000..f8fb0fd2f --- /dev/null +++ b/thirdparty/eigen/doc/TutorialMapClass.dox @@ -0,0 +1,86 @@ +namespace Eigen { + +/** \eigenManualPage TutorialMapClass Interfacing with raw buffers: the Map class + +This page explains how to work with "raw" C/C++ arrays. +This can be useful in a variety of contexts, particularly when "importing" vectors and matrices from other libraries into %Eigen. + +\eigenAutoToc + +\section TutorialMapIntroduction Introduction + +Occasionally you may have a pre-defined array of numbers that you want to use within %Eigen as a vector or matrix. While one option is to make a copy of the data, most commonly you probably want to re-use this memory as an %Eigen type. Fortunately, this is very easy with the Map class. + +\section TutorialMapTypes Map types and declaring Map variables + +A Map object has a type defined by its %Eigen equivalent: +\code +Map > +\endcode +Note that, in this default case, a Map requires just a single template parameter. + +To construct a Map variable, you need two other pieces of information: a pointer to the region of memory defining the array of coefficients, and the desired shape of the matrix or vector. For example, to define a matrix of \c float with sizes determined at compile time, you might do the following: +\code +Map mf(pf,rows,columns); +\endcode +where \c pf is a \c float \c * pointing to the array of memory. A fixed-size read-only vector of integers might be declared as +\code +Map mi(pi); +\endcode +where \c pi is an \c int \c *. In this case the size does not have to be passed to the constructor, because it is already specified by the Matrix/Array type. + +Note that Map does not have a default constructor; you \em must pass a pointer to intialize the object. However, you can work around this requirement (see \ref TutorialMapPlacementNew). + +Map is flexible enough to accomodate a variety of different data representations. There are two other (optional) template parameters: +\code +Map +\endcode +\li \c MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. The default is \c #Unaligned. +\li \c StrideType allows you to specify a custom layout for the memory array, using the Stride class. One example would be to specify that the data array is organized in row-major format: + + + + + +
Example:Output:
\include Tutorial_Map_rowmajor.cpp \verbinclude Tutorial_Map_rowmajor.out
+However, Stride is even more flexible than this; for details, see the documentation for the Map and Stride classes. + +\section TutorialMapUsing Using Map variables + +You can use a Map object just like any other %Eigen type: + + + + + +
Example:Output:
\include Tutorial_Map_using.cpp \verbinclude Tutorial_Map_using.out
+ +All %Eigen functions are written to accept Map objects just like other %Eigen types. However, when writing your own functions taking %Eigen types, this does \em not happen automatically: a Map type is not identical to its Dense equivalent. See \ref TopicFunctionTakingEigenTypes for details. + +\section TutorialMapPlacementNew Changing the mapped array + +It is possible to change the array of a Map object after declaration, using the C++ "placement new" syntax: + + + + + +
Example:Output:
\include Map_placement_new.cpp \verbinclude Map_placement_new.out
+Despite appearances, this does not invoke the memory allocator, because the syntax specifies the location for storing the result. + +This syntax makes it possible to declare a Map object without first knowing the mapped array's location in memory: +\code +Map A(NULL); // don't try to use this matrix yet! +VectorXf b(n_matrices); +for (int i = 0; i < n_matrices; i++) +{ + new (&A) Map(get_matrix_pointer(i)); + b(i) = A.trace(); +} +\endcode + +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialMatrixArithmetic.dox b/thirdparty/eigen/doc/TutorialMatrixArithmetic.dox new file mode 100644 index 000000000..5fc569a30 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialMatrixArithmetic.dox @@ -0,0 +1,214 @@ +namespace Eigen { + +/** \eigenManualPage TutorialMatrixArithmetic Matrix and vector arithmetic + +This page aims to provide an overview and some details on how to perform arithmetic +between matrices, vectors and scalars with Eigen. + +\eigenAutoToc + +\section TutorialArithmeticIntroduction Introduction + +Eigen offers matrix/vector arithmetic operations either through overloads of common C++ arithmetic operators such as +, -, *, +or through special methods such as dot(), cross(), etc. +For the Matrix class (matrices and vectors), operators are only overloaded to support +linear-algebraic operations. For example, \c matrix1 \c * \c matrix2 means matrix-matrix product, +and \c vector \c + \c scalar is just not allowed. If you want to perform all kinds of array operations, +not linear algebra, see the \ref TutorialArrayClass "next page". + +\section TutorialArithmeticAddSub Addition and subtraction + +The left hand side and right hand side must, of course, have the same numbers of rows and of columns. They must +also have the same \c Scalar type, as Eigen doesn't do automatic type promotion. The operators at hand here are: +\li binary operator + as in \c a+b +\li binary operator - as in \c a-b +\li unary operator - as in \c -a +\li compound operator += as in \c a+=b +\li compound operator -= as in \c a-=b + + + + +
Example:Output:
+\include tut_arithmetic_add_sub.cpp + +\verbinclude tut_arithmetic_add_sub.out +
+ +\section TutorialArithmeticScalarMulDiv Scalar multiplication and division + +Multiplication and division by a scalar is very simple too. The operators at hand here are: +\li binary operator * as in \c matrix*scalar +\li binary operator * as in \c scalar*matrix +\li binary operator / as in \c matrix/scalar +\li compound operator *= as in \c matrix*=scalar +\li compound operator /= as in \c matrix/=scalar + + + + +
Example:Output:
+\include tut_arithmetic_scalar_mul_div.cpp + +\verbinclude tut_arithmetic_scalar_mul_div.out +
+ + +\section TutorialArithmeticMentionXprTemplates A note about expression templates + +This is an advanced topic that we explain on \ref TopicEigenExpressionTemplates "this page", +but it is useful to just mention it now. In Eigen, arithmetic operators such as \c operator+ don't +perform any computation by themselves, they just return an "expression object" describing the computation to be +performed. The actual computation happens later, when the whole expression is evaluated, typically in \c operator=. +While this might sound heavy, any modern optimizing compiler is able to optimize away that abstraction and +the result is perfectly optimized code. For example, when you do: +\code +VectorXf a(50), b(50), c(50), d(50); +... +a = 3*b + 4*c + 5*d; +\endcode +Eigen compiles it to just one for loop, so that the arrays are traversed only once. Simplifying (e.g. ignoring +SIMD optimizations), this loop looks like this: +\code +for(int i = 0; i < 50; ++i) + a[i] = 3*b[i] + 4*c[i] + 5*d[i]; +\endcode +Thus, you should not be afraid of using relatively large arithmetic expressions with Eigen: it only gives Eigen +more opportunities for optimization. + +\section TutorialArithmeticTranspose Transposition and conjugation + +The transpose \f$ a^T \f$, conjugate \f$ \bar{a} \f$, and adjoint (i.e., conjugate transpose) \f$ a^* \f$ of a matrix or vector \f$ a \f$ are obtained by the member functions \link DenseBase::transpose() transpose()\endlink, \link MatrixBase::conjugate() conjugate()\endlink, and \link MatrixBase::adjoint() adjoint()\endlink, respectively. + + + + +
Example:Output:
+\include tut_arithmetic_transpose_conjugate.cpp + +\verbinclude tut_arithmetic_transpose_conjugate.out +
+ +For real matrices, \c conjugate() is a no-operation, and so \c adjoint() is equivalent to \c transpose(). + +As for basic arithmetic operators, \c transpose() and \c adjoint() simply return a proxy object without doing the actual transposition. If you do b = a.transpose(), then the transpose is evaluated at the same time as the result is written into \c b. However, there is a complication here. If you do a = a.transpose(), then Eigen starts writing the result into \c a before the evaluation of the transpose is finished. Therefore, the instruction a = a.transpose() does not replace \c a with its transpose, as one would expect: + + + +
Example:Output:
+\include tut_arithmetic_transpose_aliasing.cpp + +\verbinclude tut_arithmetic_transpose_aliasing.out +
+This is the so-called \ref TopicAliasing "aliasing issue". In "debug mode", i.e., when \ref TopicAssertions "assertions" have not been disabled, such common pitfalls are automatically detected. + +For \em in-place transposition, as for instance in a = a.transpose(), simply use the \link DenseBase::transposeInPlace() transposeInPlace()\endlink function: + + + +
Example:Output:
+\include tut_arithmetic_transpose_inplace.cpp + +\verbinclude tut_arithmetic_transpose_inplace.out +
+There is also the \link MatrixBase::adjointInPlace() adjointInPlace()\endlink function for complex matrices. + +\section TutorialArithmeticMatrixMul Matrix-matrix and matrix-vector multiplication + +Matrix-matrix multiplication is again done with \c operator*. Since vectors are a special +case of matrices, they are implicitly handled there too, so matrix-vector product is really just a special +case of matrix-matrix product, and so is vector-vector outer product. Thus, all these cases are handled by just +two operators: +\li binary operator * as in \c a*b +\li compound operator *= as in \c a*=b (this multiplies on the right: \c a*=b is equivalent to a = a*b) + + + + +
Example:Output:
+\include tut_arithmetic_matrix_mul.cpp + +\verbinclude tut_arithmetic_matrix_mul.out +
+ +Note: if you read the above paragraph on expression templates and are worried that doing \c m=m*m might cause +aliasing issues, be reassured for now: Eigen treats matrix multiplication as a special case and takes care of +introducing a temporary here, so it will compile \c m=m*m as: +\code +tmp = m*m; +m = tmp; +\endcode +If you know your matrix product can be safely evaluated into the destination matrix without aliasing issue, then you can use the \link MatrixBase::noalias() noalias()\endlink function to avoid the temporary, e.g.: +\code +c.noalias() += a * b; +\endcode +For more details on this topic, see the page on \ref TopicAliasing "aliasing". + +\b Note: for BLAS users worried about performance, expressions such as c.noalias() -= 2 * a.adjoint() * b; are fully optimized and trigger a single gemm-like function call. + +\section TutorialArithmeticDotAndCross Dot product and cross product + +For dot product and cross product, you need the \link MatrixBase::dot() dot()\endlink and \link MatrixBase::cross() cross()\endlink methods. Of course, the dot product can also be obtained as a 1x1 matrix as u.adjoint()*v. + + + +
Example:Output:
+\include tut_arithmetic_dot_cross.cpp + +\verbinclude tut_arithmetic_dot_cross.out +
+ +Remember that cross product is only for vectors of size 3. Dot product is for vectors of any sizes. +When using complex numbers, Eigen's dot product is conjugate-linear in the first variable and linear in the +second variable. + +\section TutorialArithmeticRedux Basic arithmetic reduction operations +Eigen also provides some reduction operations to reduce a given matrix or vector to a single value such as the sum (computed by \link DenseBase::sum() sum()\endlink), product (\link DenseBase::prod() prod()\endlink), or the maximum (\link DenseBase::maxCoeff() maxCoeff()\endlink) and minimum (\link DenseBase::minCoeff() minCoeff()\endlink) of all its coefficients. + + + + +
Example:Output:
+\include tut_arithmetic_redux_basic.cpp + +\verbinclude tut_arithmetic_redux_basic.out +
+ +The \em trace of a matrix, as returned by the function \link MatrixBase::trace() trace()\endlink, is the sum of the diagonal coefficients and can also be computed as efficiently using a.diagonal().sum(), as we will see later on. + +There also exist variants of the \c minCoeff and \c maxCoeff functions returning the coordinates of the respective coefficient via the arguments: + + + + +
Example:Output:
+\include tut_arithmetic_redux_minmax.cpp + +\verbinclude tut_arithmetic_redux_minmax.out +
+ + +\section TutorialArithmeticValidity Validity of operations +Eigen checks the validity of the operations that you perform. When possible, +it checks them at compile time, producing compilation errors. These error messages can be long and ugly, +but Eigen writes the important message in UPPERCASE_LETTERS_SO_IT_STANDS_OUT. For example: +\code + Matrix3f m; + Vector4f v; + v = m*v; // Compile-time error: YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES +\endcode + +Of course, in many cases, for example when checking dynamic sizes, the check cannot be performed at compile time. +Eigen then uses runtime assertions. This means that the program will abort with an error message when executing an illegal operation if it is run in "debug mode", and it will probably crash if assertions are turned off. + +\code + MatrixXf m(3,3); + VectorXf v(4); + v = m * v; // Run-time assertion failure here: "invalid matrix product" +\endcode + +For more details on this topic, see \ref TopicAssertions "this page". + +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialMatrixClass.dox b/thirdparty/eigen/doc/TutorialMatrixClass.dox new file mode 100644 index 000000000..7ea0cd789 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialMatrixClass.dox @@ -0,0 +1,265 @@ +namespace Eigen { + +/** \eigenManualPage TutorialMatrixClass The Matrix class + +\eigenAutoToc + +In Eigen, all matrices and vectors are objects of the Matrix template class. +Vectors are just a special case of matrices, with either 1 row or 1 column. + +\section TutorialMatrixFirst3Params The first three template parameters of Matrix + +The Matrix class takes six template parameters, but for now it's enough to +learn about the first three first parameters. The three remaining parameters have default +values, which for now we will leave untouched, and which we +\ref TutorialMatrixOptTemplParams "discuss below". + +The three mandatory template parameters of Matrix are: +\code +Matrix +\endcode +\li \c Scalar is the scalar type, i.e. the type of the coefficients. + That is, if you want a matrix of floats, choose \c float here. + See \ref TopicScalarTypes "Scalar types" for a list of all supported + scalar types and for how to extend support to new types. +\li \c RowsAtCompileTime and \c ColsAtCompileTime are the number of rows + and columns of the matrix as known at compile time (see + \ref TutorialMatrixDynamic "below" for what to do if the number is not + known at compile time). + +We offer a lot of convenience typedefs to cover the usual cases. For example, \c Matrix4f is +a 4x4 matrix of floats. Here is how it is defined by Eigen: +\code +typedef Matrix Matrix4f; +\endcode +We discuss \ref TutorialMatrixTypedefs "below" these convenience typedefs. + +\section TutorialMatrixVectors Vectors + +As mentioned above, in Eigen, vectors are just a special case of +matrices, with either 1 row or 1 column. The case where they have 1 column is the most common; +such vectors are called column-vectors, often abbreviated as just vectors. In the other case +where they have 1 row, they are called row-vectors. + +For example, the convenience typedef \c Vector3f is a (column) vector of 3 floats. It is defined as follows by Eigen: +\code +typedef Matrix Vector3f; +\endcode +We also offer convenience typedefs for row-vectors, for example: +\code +typedef Matrix RowVector2i; +\endcode + +\section TutorialMatrixDynamic The special value Dynamic + +Of course, Eigen is not limited to matrices whose dimensions are known at compile time. +The \c RowsAtCompileTime and \c ColsAtCompileTime template parameters can take the special +value \c Dynamic which indicates that the size is unknown at compile time, so must +be handled as a run-time variable. In Eigen terminology, such a size is referred to as a +\em dynamic \em size; while a size that is known at compile time is called a +\em fixed \em size. For example, the convenience typedef \c MatrixXd, meaning +a matrix of doubles with dynamic size, is defined as follows: +\code +typedef Matrix MatrixXd; +\endcode +And similarly, we define a self-explanatory typedef \c VectorXi as follows: +\code +typedef Matrix VectorXi; +\endcode +You can perfectly have e.g. a fixed number of rows with a dynamic number of columns, as in: +\code +Matrix +\endcode + +\section TutorialMatrixConstructors Constructors + +A default constructor is always available, never performs any dynamic memory allocation, and never initializes the matrix coefficients. You can do: +\code +Matrix3f a; +MatrixXf b; +\endcode +Here, +\li \c a is a 3-by-3 matrix, with a plain float[9] array of uninitialized coefficients, +\li \c b is a dynamic-size matrix whose size is currently 0-by-0, and whose array of +coefficients hasn't yet been allocated at all. + +Constructors taking sizes are also available. For matrices, the number of rows is always passed first. +For vectors, just pass the vector size. They allocate the array of coefficients +with the given size, but don't initialize the coefficients themselves: +\code +MatrixXf a(10,15); +VectorXf b(30); +\endcode +Here, +\li \c a is a 10x15 dynamic-size matrix, with allocated but currently uninitialized coefficients. +\li \c b is a dynamic-size vector of size 30, with allocated but currently uninitialized coefficients. + +In order to offer a uniform API across fixed-size and dynamic-size matrices, it is legal to use these +constructors on fixed-size matrices, even if passing the sizes is useless in this case. So this is legal: +\code +Matrix3f a(3,3); +\endcode +and is a no-operation. + +Finally, we also offer some constructors to initialize the coefficients of small fixed-size vectors up to size 4: +\code +Vector2d a(5.0, 6.0); +Vector3d b(5.0, 6.0, 7.0); +Vector4d c(5.0, 6.0, 7.0, 8.0); +\endcode + +\section TutorialMatrixCoeffAccessors Coefficient accessors + +The primary coefficient accessors and mutators in Eigen are the overloaded parenthesis operators. +For matrices, the row index is always passed first. For vectors, just pass one index. +The numbering starts at 0. This example is self-explanatory: + + + + +
Example:Output:
+\include tut_matrix_coefficient_accessors.cpp + +\verbinclude tut_matrix_coefficient_accessors.out +
+ +Note that the syntax m(index) +is not restricted to vectors, it is also available for general matrices, meaning index-based access +in the array of coefficients. This however depends on the matrix's storage order. All Eigen matrices default to +column-major storage order, but this can be changed to row-major, see \ref TopicStorageOrders "Storage orders". + +The operator[] is also overloaded for index-based access in vectors, but keep in mind that C++ doesn't allow operator[] to +take more than one argument. We restrict operator[] to vectors, because an awkwardness in the C++ language +would make matrix[i,j] compile to the same thing as matrix[j] ! + +\section TutorialMatrixCommaInitializer Comma-initialization + +%Matrix and vector coefficients can be conveniently set using the so-called \em comma-initializer syntax. +For now, it is enough to know this example: + + + + + + +
Example:Output:
\include Tutorial_commainit_01.cpp \verbinclude Tutorial_commainit_01.out
+ + +The right-hand side can also contain matrix expressions as discussed in \ref TutorialAdvancedInitialization "this page". + +\section TutorialMatrixSizesResizing Resizing + +The current size of a matrix can be retrieved by \link EigenBase::rows() rows()\endlink, \link EigenBase::cols() cols() \endlink and \link EigenBase::size() size()\endlink. These methods return the number of rows, the number of columns and the number of coefficients, respectively. Resizing a dynamic-size matrix is done by the \link PlainObjectBase::resize(Index,Index) resize() \endlink method. + + + + + + +
Example:Output:
\include tut_matrix_resize.cpp \verbinclude tut_matrix_resize.out
+ +The resize() method is a no-operation if the actual matrix size doesn't change; otherwise it is destructive: the values of the coefficients may change. +If you want a conservative variant of resize() which does not change the coefficients, use \link PlainObjectBase::conservativeResize() conservativeResize()\endlink, see \ref TopicResizing "this page" for more details. + +All these methods are still available on fixed-size matrices, for the sake of API uniformity. Of course, you can't actually +resize a fixed-size matrix. Trying to change a fixed size to an actually different value will trigger an assertion failure; +but the following code is legal: + + + + + + +
Example:Output:
\include tut_matrix_resize_fixed_size.cpp \verbinclude tut_matrix_resize_fixed_size.out
+ + +\section TutorialMatrixAssignment Assignment and resizing + +Assignment is the action of copying a matrix into another, using \c operator=. Eigen resizes the matrix on the left-hand side automatically so that it matches the size of the matrix on the right-hand size. For example: + + + + + + +
Example:Output:
\include tut_matrix_assignment_resizing.cpp \verbinclude tut_matrix_assignment_resizing.out
+ +Of course, if the left-hand side is of fixed size, resizing it is not allowed. + +If you do not want this automatic resizing to happen (for example for debugging purposes), you can disable it, see +\ref TopicResizing "this page". + + +\section TutorialMatrixFixedVsDynamic Fixed vs. Dynamic size + +When should one use fixed sizes (e.g. \c Matrix4f), and when should one prefer dynamic sizes (e.g. \c MatrixXf)? +The simple answer is: use fixed +sizes for very small sizes where you can, and use dynamic sizes for larger sizes or where you have to. For small sizes, +especially for sizes smaller than (roughly) 16, using fixed sizes is hugely beneficial +to performance, as it allows Eigen to avoid dynamic memory allocation and to unroll +loops. Internally, a fixed-size Eigen matrix is just a plain array, i.e. doing +\code Matrix4f mymatrix; \endcode +really amounts to just doing +\code float mymatrix[16]; \endcode +so this really has zero runtime cost. By contrast, the array of a dynamic-size matrix +is always allocated on the heap, so doing +\code MatrixXf mymatrix(rows,columns); \endcode +amounts to doing +\code float *mymatrix = new float[rows*columns]; \endcode +and in addition to that, the MatrixXf object stores its number of rows and columns as +member variables. + +The limitation of using fixed sizes, of course, is that this is only possible +when you know the sizes at compile time. Also, for large enough sizes, say for sizes +greater than (roughly) 32, the performance benefit of using fixed sizes becomes negligible. +Worse, trying to create a very large matrix using fixed sizes inside a function could result in a +stack overflow, since Eigen will try to allocate the array automatically as a local variable, and +this is normally done on the stack. +Finally, depending on circumstances, Eigen can also be more aggressive trying to vectorize +(use SIMD instructions) when dynamic sizes are used, see \ref TopicVectorization "Vectorization". + +\section TutorialMatrixOptTemplParams Optional template parameters + +We mentioned at the beginning of this page that the Matrix class takes six template parameters, +but so far we only discussed the first three. The remaining three parameters are optional. Here is +the complete list of template parameters: +\code +Matrix +\endcode +\li \c Options is a bit field. Here, we discuss only one bit: \c RowMajor. It specifies that the matrices + of this type use row-major storage order; by default, the storage order is column-major. See the page on + \ref TopicStorageOrders "storage orders". For example, this type means row-major 3x3 matrices: + \code + Matrix + \endcode +\li \c MaxRowsAtCompileTime and \c MaxColsAtCompileTime are useful when you want to specify that, even though + the exact sizes of your matrices are not known at compile time, a fixed upper bound is known at + compile time. The biggest reason why you might want to do that is to avoid dynamic memory allocation. + For example the following matrix type uses a plain array of 12 floats, without dynamic memory allocation: + \code + Matrix + \endcode + +\section TutorialMatrixTypedefs Convenience typedefs + +Eigen defines the following Matrix typedefs: +\li MatrixNt for Matrix. For example, MatrixXi for Matrix. +\li VectorNt for Matrix. For example, Vector2f for Matrix. +\li RowVectorNt for Matrix. For example, RowVector3d for Matrix. + +Where: +\li N can be any one of \c 2, \c 3, \c 4, or \c X (meaning \c Dynamic). +\li t can be any one of \c i (meaning int), \c f (meaning float), \c d (meaning double), + \c cf (meaning complex), or \c cd (meaning complex). The fact that typedefs are only + defined for these five types doesn't mean that they are the only supported scalar types. For example, + all standard integer types are supported, see \ref TopicScalarTypes "Scalar types". + + +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialReductionsVisitorsBroadcasting.dox b/thirdparty/eigen/doc/TutorialReductionsVisitorsBroadcasting.dox new file mode 100644 index 000000000..f5322b4a6 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialReductionsVisitorsBroadcasting.dox @@ -0,0 +1,266 @@ +namespace Eigen { + +/** \eigenManualPage TutorialReductionsVisitorsBroadcasting Reductions, visitors and broadcasting + +This page explains Eigen's reductions, visitors and broadcasting and how they are used with +\link MatrixBase matrices \endlink and \link ArrayBase arrays \endlink. + +\eigenAutoToc + +\section TutorialReductionsVisitorsBroadcastingReductions Reductions +In Eigen, a reduction is a function taking a matrix or array, and returning a single +scalar value. One of the most used reductions is \link DenseBase::sum() .sum() \endlink, +returning the sum of all the coefficients inside a given matrix or array. + + + + +
Example:Output:
+\include tut_arithmetic_redux_basic.cpp + +\verbinclude tut_arithmetic_redux_basic.out +
+ +The \em trace of a matrix, as returned by the function \c trace(), is the sum of the diagonal coefficients and can equivalently be computed a.diagonal().sum(). + + +\subsection TutorialReductionsVisitorsBroadcastingReductionsNorm Norm computations + +The (Euclidean a.k.a. \f$\ell^2\f$) squared norm of a vector can be obtained \link MatrixBase::squaredNorm() squaredNorm() \endlink. It is equal to the dot product of the vector by itself, and equivalently to the sum of squared absolute values of its coefficients. + +Eigen also provides the \link MatrixBase::norm() norm() \endlink method, which returns the square root of \link MatrixBase::squaredNorm() squaredNorm() \endlink. + +These operations can also operate on matrices; in that case, a n-by-p matrix is seen as a vector of size (n*p), so for example the \link MatrixBase::norm() norm() \endlink method returns the "Frobenius" or "Hilbert-Schmidt" norm. We refrain from speaking of the \f$\ell^2\f$ norm of a matrix because that can mean different things. + +If you want other coefficient-wise \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm lpNorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. + +The following example demonstrates these methods. + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.out +
+ +\b Operator \b norm: The 1-norm and \f$\infty\f$-norm matrix operator norms can easily be computed as follows: + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.out +
+See below for more explanations on the syntax of these expressions. + +\subsection TutorialReductionsVisitorsBroadcastingReductionsBool Boolean reductions + +The following reductions operate on boolean values: + - \link DenseBase::all() all() \endlink returns \b true if all of the coefficients in a given Matrix or Array evaluate to \b true . + - \link DenseBase::any() any() \endlink returns \b true if at least one of the coefficients in a given Matrix or Array evaluates to \b true . + - \link DenseBase::count() count() \endlink returns the number of coefficients in a given Matrix or Array that evaluate to \b true. + +These are typically used in conjunction with the coefficient-wise comparison and equality operators provided by Array. For instance, array > 0 is an %Array of the same size as \c array , with \b true at those positions where the corresponding coefficient of \c array is positive. Thus, (array > 0).all() tests whether all coefficients of \c array are positive. This can be seen in the following example: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.out +
+ +\subsection TutorialReductionsVisitorsBroadcastingReductionsUserdefined User defined reductions + +TODO + +In the meantime you can have a look at the DenseBase::redux() function. + +\section TutorialReductionsVisitorsBroadcastingVisitors Visitors +Visitors are useful when one wants to obtain the location of a coefficient inside +a Matrix or Array. The simplest examples are +\link MatrixBase::maxCoeff() maxCoeff(&x,&y) \endlink and +\link MatrixBase::minCoeff() minCoeff(&x,&y)\endlink, which can be used to find +the location of the greatest or smallest coefficient in a Matrix or +Array. + +The arguments passed to a visitor are pointers to the variables where the +row and column position are to be stored. These variables should be of type +\link Eigen::Index Index \endlink, as shown below: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_visitors.out +
+ +Both functions also return the value of the minimum or maximum coefficient. + +\section TutorialReductionsVisitorsBroadcastingPartialReductions Partial reductions +Partial reductions are reductions that can operate column- or row-wise on a Matrix or +Array, applying the reduction operation on each column or row and +returning a column or row vector with the corresponding values. Partial reductions are applied +with \link DenseBase::colwise() colwise() \endlink or \link DenseBase::rowwise() rowwise() \endlink. + +A simple example is obtaining the maximum of the elements +in each column in a given matrix, storing the result in a row vector: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_colwise.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_colwise.out +
+ +The same operation can be performed row-wise: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_rowwise.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_rowwise.out +
+ +Note that column-wise operations return a row vector, while row-wise operations return a column vector. + +\subsection TutorialReductionsVisitorsBroadcastingPartialReductionsCombined Combining partial reductions with other operations +It is also possible to use the result of a partial reduction to do further processing. +Here is another example that finds the column whose sum of elements is the maximum + within a matrix. With column-wise partial reductions this can be coded as: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_maxnorm.out +
+ +The previous example applies the \link DenseBase::sum() sum() \endlink reduction on each column +though the \link DenseBase::colwise() colwise() \endlink visitor, obtaining a new matrix whose +size is 1x4. + +Therefore, if +\f[ +\mbox{m} = \begin{bmatrix} 1 & 2 & 6 & 9 \\ + 3 & 1 & 7 & 2 \end{bmatrix} +\f] + +then + +\f[ +\mbox{m.colwise().sum()} = \begin{bmatrix} 4 & 3 & 13 & 11 \end{bmatrix} +\f] + +The \link DenseBase::maxCoeff() maxCoeff() \endlink reduction is finally applied +to obtain the column index where the maximum sum is found, +which is the column index 2 (third column) in this case. + + +\section TutorialReductionsVisitorsBroadcastingBroadcasting Broadcasting +The concept behind broadcasting is similar to partial reductions, with the difference that broadcasting +constructs an expression where a vector (column or row) is interpreted as a matrix by replicating it in +one direction. + +A simple example is to add a certain column vector to each column in a matrix. +This can be accomplished with: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple.out +
+ +We can interpret the instruction mat.colwise() += v in two equivalent ways. It adds the vector \c v +to every column of the matrix. Alternatively, it can be interpreted as repeating the vector \c v four times to +form a four-by-two matrix which is then added to \c mat: +\f[ +\begin{bmatrix} 1 & 2 & 6 & 9 \\ 3 & 1 & 7 & 2 \end{bmatrix} ++ \begin{bmatrix} 0 & 0 & 0 & 0 \\ 1 & 1 & 1 & 1 \end{bmatrix} += \begin{bmatrix} 1 & 2 & 6 & 9 \\ 4 & 2 & 8 & 3 \end{bmatrix}. +\f] +The operators -=, + and - can also be used column-wise and row-wise. On arrays, we +can also use the operators *=, /=, * and / to perform coefficient-wise +multiplication and division column-wise or row-wise. These operators are not available on matrices because it +is not clear what they would do. If you want multiply column 0 of a matrix \c mat with \c v(0), column 1 with +\c v(1), and so on, then use mat = mat * v.asDiagonal(). + +It is important to point out that the vector to be added column-wise or row-wise must be of type Vector, +and cannot be a Matrix. If this is not met then you will get compile-time error. This also means that +broadcasting operations can only be applied with an object of type Vector, when operating with Matrix. +The same applies for the Array class, where the equivalent for VectorXf is ArrayXf. As always, you should +not mix arrays and matrices in the same expression. + +To perform the same operation row-wise we can do: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple_rowwise.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple_rowwise.out +
+ +\subsection TutorialReductionsVisitorsBroadcastingBroadcastingCombined Combining broadcasting with other operations +Broadcasting can also be combined with other operations, such as Matrix or Array operations, +reductions and partial reductions. + +Now that broadcasting, reductions and partial reductions have been introduced, we can dive into a more advanced example that finds +the nearest neighbour of a vector v within the columns of matrix m. The Euclidean distance will be used in this example, +computing the squared Euclidean distance with the partial reduction named \link MatrixBase::squaredNorm() squaredNorm() \endlink: + + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.out +
+ +The line that does the job is +\code + (m.colwise() - v).colwise().squaredNorm().minCoeff(&index); +\endcode + +We will go step by step to understand what is happening: + + - m.colwise() - v is a broadcasting operation, subtracting v from each column in m. The result of this operation +is a new matrix whose size is the same as matrix m: \f[ + \mbox{m.colwise() - v} = + \begin{bmatrix} + -1 & 21 & 4 & 7 \\ + 0 & 8 & 4 & -1 + \end{bmatrix} +\f] + + - (m.colwise() - v).colwise().squaredNorm() is a partial reduction, computing the squared norm column-wise. The result of +this operation is a row vector where each coefficient is the squared Euclidean distance between each column in m and v: \f[ + \mbox{(m.colwise() - v).colwise().squaredNorm()} = + \begin{bmatrix} + 1 & 505 & 32 & 50 + \end{bmatrix} +\f] + + - Finally, minCoeff(&index) is used to obtain the index of the column in m that is closest to v in terms of Euclidean +distance. + +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialReshapeSlicing.dox b/thirdparty/eigen/doc/TutorialReshapeSlicing.dox new file mode 100644 index 000000000..3730a5de6 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialReshapeSlicing.dox @@ -0,0 +1,65 @@ +namespace Eigen { + +/** \eigenManualPage TutorialReshapeSlicing Reshape and Slicing + +%Eigen does not expose convenient methods to take slices or to reshape a matrix yet. +Nonetheless, such features can easily be emulated using the Map class. + +\eigenAutoToc + +\section TutorialReshape Reshape + +A reshape operation consists in modifying the sizes of a matrix while keeping the same coefficients. +Instead of modifying the input matrix itself, which is not possible for compile-time sizes, the approach consist in creating a different \em view on the storage using class Map. +Here is a typical example creating a 1D linear view of a matrix: + + + + +
Example:Output:
+\include Tutorial_ReshapeMat2Vec.cpp + +\verbinclude Tutorial_ReshapeMat2Vec.out +
+ +Remark how the storage order of the input matrix modifies the order of the coefficients in the linear view. +Here is another example reshaping a 2x6 matrix to a 6x2 one: + + + +
Example:Output:
+\include Tutorial_ReshapeMat2Mat.cpp + +\verbinclude Tutorial_ReshapeMat2Mat.out +
+ + + +\section TutorialSlicing Slicing + +Slicing consists in taking a set of rows, columns, or elements, uniformly spaced within a matrix. +Again, the class Map allows to easily mimic this feature. + +For instance, one can skip every P elements in a vector: + + + +
Example:Output:
+\include Tutorial_SlicingVec.cpp + +\verbinclude Tutorial_SlicingVec.out +
+ +One can olso take one column over three using an adequate outer-stride or inner-stride depending on the actual storage order: + + + +
Example:Output:
+\include Tutorial_SlicingCol.cpp + +\verbinclude Tutorial_SlicingCol.out +
+ +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialSparse.dox b/thirdparty/eigen/doc/TutorialSparse.dox new file mode 100644 index 000000000..352907408 --- /dev/null +++ b/thirdparty/eigen/doc/TutorialSparse.dox @@ -0,0 +1,365 @@ +namespace Eigen { + +/** \eigenManualPage TutorialSparse Sparse matrix manipulations + +\eigenAutoToc + +Manipulating and solving sparse problems involves various modules which are summarized below: + + + + + + + + + + +
ModuleHeader fileContents
\link SparseCore_Module SparseCore \endlink\code#include \endcodeSparseMatrix and SparseVector classes, matrix assembly, basic sparse linear algebra (including sparse triangular solvers)
\link SparseCholesky_Module SparseCholesky \endlink\code#include \endcodeDirect sparse LLT and LDLT Cholesky factorization to solve sparse self-adjoint positive definite problems
\link SparseLU_Module SparseLU \endlink\code #include \endcode%Sparse LU factorization to solve general square sparse systems
\link SparseQR_Module SparseQR \endlink\code #include\endcode %Sparse QR factorization for solving sparse linear least-squares problems
\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlink\code#include \endcodeIterative solvers to solve large general linear square problems (including self-adjoint positive definite problems)
\link Sparse_Module Sparse \endlink\code#include \endcodeIncludes all the above modules
+ +\section TutorialSparseIntro Sparse matrix format + +In many applications (e.g., finite element methods) it is common to deal with very large matrices where only a few coefficients are different from zero. In such cases, memory consumption can be reduced and performance increased by using a specialized representation storing only the nonzero coefficients. Such a matrix is called a sparse matrix. + +\b The \b %SparseMatrix \b class + +The class SparseMatrix is the main sparse matrix representation of Eigen's sparse module; it offers high performance and low memory usage. +It implements a more versatile variant of the widely-used Compressed Column (or Row) Storage scheme. +It consists of four compact arrays: + - \c Values: stores the coefficient values of the non-zeros. + - \c InnerIndices: stores the row (resp. column) indices of the non-zeros. + - \c OuterStarts: stores for each column (resp. row) the index of the first non-zero in the previous two arrays. + - \c InnerNNZs: stores the number of non-zeros of each column (resp. row). +The word \c inner refers to an \em inner \em vector that is a column for a column-major matrix, or a row for a row-major matrix. +The word \c outer refers to the other direction. + +This storage scheme is better explained on an example. The following matrix + + + + + + +
03 00 0
220 0017
75 01 0
00 00 0
00140 8
+ +and one of its possible sparse, \b column \b major representation: + + + +
Values: 227_3514__1_178
InnerIndices: 12_02 4__2_ 14
+ + + +
OuterStarts:035810\em 12
InnerNNZs: 2211 2
+ +Currently the elements of a given inner vector are guaranteed to be always sorted by increasing inner indices. +The \c "_" indicates available free space to quickly insert new elements. +Assuming no reallocation is needed, the insertion of a random element is therefore in O(nnz_j) where nnz_j is the number of nonzeros of the respective inner vector. +On the other hand, inserting elements with increasing inner indices in a given inner vector is much more efficient since this only requires to increase the respective \c InnerNNZs entry that is a O(1) operation. + +The case where no empty space is available is a special case, and is refered as the \em compressed mode. +It corresponds to the widely used Compressed Column (or Row) Storage schemes (CCS or CRS). +Any SparseMatrix can be turned to this form by calling the SparseMatrix::makeCompressed() function. +In this case, one can remark that the \c InnerNNZs array is redundant with \c OuterStarts because we the equality: \c InnerNNZs[j] = \c OuterStarts[j+1]-\c OuterStarts[j]. +Therefore, in practice a call to SparseMatrix::makeCompressed() frees this buffer. + +It is worth noting that most of our wrappers to external libraries requires compressed matrices as inputs. + +The results of %Eigen's operations always produces \b compressed sparse matrices. +On the other hand, the insertion of a new element into a SparseMatrix converts this later to the \b uncompressed mode. + +Here is the previous matrix represented in compressed mode: + + + +
Values: 22735141178
InnerIndices: 1202 42 14
+ + +
OuterStarts:02456\em 8
+ +A SparseVector is a special case of a SparseMatrix where only the \c Values and \c InnerIndices arrays are stored. +There is no notion of compressed/uncompressed mode for a SparseVector. + + +\section TutorialSparseExample First example + +Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \Delta u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. +Such problem can be mathematically expressed as a linear problem of the form \f$ Ax=b \f$ where \f$ x \f$ is the vector of \c m unknowns (in our case, the values of the pixels), \f$ b \f$ is the right hand side vector resulting from the boundary conditions, and \f$ A \f$ is an \f$ m \times m \f$ matrix containing only a few non-zero elements resulting from the discretization of the Laplacian operator. + + + +
+\include Tutorial_sparse_example.cpp + +\image html Tutorial_sparse_example.jpeg +
+ +In this example, we start by defining a column-major sparse matrix type of double \c SparseMatrix, and a triplet list of the same scalar type \c Triplet. A triplet is a simple object representing a non-zero entry as the triplet: \c row index, \c column index, \c value. + +In the main function, we declare a list \c coefficients of triplets (as a std vector) and the right hand side vector \f$ b \f$ which are filled by the \a buildProblem function. +The raw and flat list of non-zero entries is then converted to a true SparseMatrix object \c A. +Note that the elements of the list do not have to be sorted, and possible duplicate entries will be summed up. + +The last step consists of effectively solving the assembled problem. +Since the resulting matrix \c A is symmetric by construction, we can perform a direct Cholesky factorization via the SimplicialLDLT class which behaves like its LDLT counterpart for dense objects. + +The resulting vector \c x contains the pixel values as a 1D array which is saved to a jpeg file shown on the right of the code above. + +Describing the \a buildProblem and \a save functions is out of the scope of this tutorial. They are given \ref TutorialSparse_example_details "here" for the curious and reproducibility purpose. + + + + +\section TutorialSparseSparseMatrix The SparseMatrix class + +\b %Matrix \b and \b vector \b properties \n + +The SparseMatrix and SparseVector classes take three template arguments: + * the scalar type (e.g., double) + * the storage order (ColMajor or RowMajor, the default is ColMajor) + * the inner index type (default is \c int). + +As for dense Matrix objects, constructors takes the size of the object. +Here are some examples: + +\code +SparseMatrix > mat(1000,2000); // declares a 1000x2000 column-major compressed sparse matrix of complex +SparseMatrix mat(1000,2000); // declares a 1000x2000 row-major compressed sparse matrix of double +SparseVector > vec(1000); // declares a column sparse vector of complex of size 1000 +SparseVector vec(1000); // declares a row sparse vector of double of size 1000 +\endcode + +In the rest of the tutorial, \c mat and \c vec represent any sparse-matrix and sparse-vector objects, respectively. + +The dimensions of a matrix can be queried using the following functions: + + + + + + + + + +
Standard \n dimensions\code +mat.rows() +mat.cols()\endcode\code +vec.size() \endcode
Sizes along the \n inner/outer dimensions\code +mat.innerSize() +mat.outerSize()\endcode
Number of non \n zero coefficients\code +mat.nonZeros() \endcode\code +vec.nonZeros() \endcode
+ + +\b Iterating \b over \b the \b nonzero \b coefficients \n + +Random access to the elements of a sparse object can be done through the \c coeffRef(i,j) function. +However, this function involves a quite expensive binary search. +In most cases, one only wants to iterate over the non-zeros elements. This is achieved by a standard loop over the outer dimension, and then by iterating over the non-zeros of the current inner vector via an InnerIterator. Thus, the non-zero entries have to be visited in the same order than the storage order. +Here is an example: + + +
+\code +SparseMatrix mat(rows,cols); +for (int k=0; k::InnerIterator it(mat,k); it; ++it) + { + it.value(); + it.row(); // row index + it.col(); // col index (here it is equal to k) + it.index(); // inner index, here it is equal to it.row() + } +\endcode + +\code +SparseVector vec(size); +for (SparseVector::InnerIterator it(vec); it; ++it) +{ + it.value(); // == vec[ it.index() ] + it.index(); +} +\endcode +
+For a writable expression, the referenced value can be modified using the valueRef() function. +If the type of the sparse matrix or vector depends on a template parameter, then the \c typename keyword is +required to indicate that \c InnerIterator denotes a type; see \ref TopicTemplateKeyword for details. + + +\section TutorialSparseFilling Filling a sparse matrix + +Because of the special storage scheme of a SparseMatrix, special care has to be taken when adding new nonzero entries. +For instance, the cost of a single purely random insertion into a SparseMatrix is \c O(nnz), where \c nnz is the current number of non-zero coefficients. + +The simplest way to create a sparse matrix while guaranteeing good performance is thus to first build a list of so-called \em triplets, and then convert it to a SparseMatrix. + +Here is a typical usage example: +\code +typedef Eigen::Triplet T; +std::vector tripletList; +tripletList.reserve(estimation_of_entries); +for(...) +{ + // ... + tripletList.push_back(T(i,j,v_ij)); +} +SparseMatrixType mat(rows,cols); +mat.setFromTriplets(tripletList.begin(), tripletList.end()); +// mat is ready to go! +\endcode +The \c std::vector of triplets might contain the elements in arbitrary order, and might even contain duplicated elements that will be summed up by setFromTriplets(). +See the SparseMatrix::setFromTriplets() function and class Triplet for more details. + + +In some cases, however, slightly higher performance, and lower memory consumption can be reached by directly inserting the non-zeros into the destination matrix. +A typical scenario of this approach is illustrated bellow: +\code +1: SparseMatrix mat(rows,cols); // default is column major +2: mat.reserve(VectorXi::Constant(cols,6)); +3: for each i,j such that v_ij != 0 +4: mat.insert(i,j) = v_ij; // alternative: mat.coeffRef(i,j) += v_ij; +5: mat.makeCompressed(); // optional +\endcode + +- The key ingredient here is the line 2 where we reserve room for 6 non-zeros per column. In many cases, the number of non-zeros per column or row can easily be known in advance. If it varies significantly for each inner vector, then it is possible to specify a reserve size for each inner vector by providing a vector object with an operator[](int j) returning the reserve size of the \c j-th inner vector (e.g., via a VectorXi or std::vector). If only a rought estimate of the number of nonzeros per inner-vector can be obtained, it is highly recommended to overestimate it rather than the opposite. If this line is omitted, then the first insertion of a new element will reserve room for 2 elements per inner vector. +- The line 4 performs a sorted insertion. In this example, the ideal case is when the \c j-th column is not full and contains non-zeros whose inner-indices are smaller than \c i. In this case, this operation boils down to trivial O(1) operation. +- When calling insert(i,j) the element \c i \c ,j must not already exists, otherwise use the coeffRef(i,j) method that will allow to, e.g., accumulate values. This method first performs a binary search and finally calls insert(i,j) if the element does not already exist. It is more flexible than insert() but also more costly. +- The line 5 suppresses the remaining empty space and transforms the matrix into a compressed column storage. + + + +\section TutorialSparseFeatureSet Supported operators and functions + +Because of their special storage format, sparse matrices cannot offer the same level of flexibility than dense matrices. +In Eigen's sparse module we chose to expose only the subset of the dense matrix API which can be efficiently implemented. +In the following \em sm denotes a sparse matrix, \em sv a sparse vector, \em dm a dense matrix, and \em dv a dense vector. + +\subsection TutorialSparse_BasicOps Basic operations + +%Sparse expressions support most of the unary and binary coefficient wise operations: +\code +sm1.real() sm1.imag() -sm1 0.5*sm1 +sm1+sm2 sm1-sm2 sm1.cwiseProduct(sm2) +\endcode +However, a strong restriction is that the storage orders must match. For instance, in the following example: +\code +sm4 = sm1 + sm2 + sm3; +\endcode +sm1, sm2, and sm3 must all be row-major or all column-major. +On the other hand, there is no restriction on the target matrix sm4. +For instance, this means that for computing \f$ A^T + A \f$, the matrix \f$ A^T \f$ must be evaluated into a temporary matrix of compatible storage order: +\code +SparseMatrix A, B; +B = SparseMatrix(A.transpose()) + A; +\endcode + +Binary coefficient wise operators can also mix sparse and dense expressions: +\code +sm2 = sm1.cwiseProduct(dm1); +dm2 = sm1 + dm1; +dm2 = dm1 - sm1; +\endcode +Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing dm2 = sm1 + dm1, better write: +\code +dm2 = dm1; +dm2 += sm1; +\endcode +This version has the advantage to fully exploit the higher performance of dense storage (no indirection, SIMD, etc.), and to pay the cost of slow sparse evaluation on the few non-zeros of the sparse matrix only. + + +%Sparse expressions also support transposition: +\code +sm1 = sm2.transpose(); +sm1 = sm2.adjoint(); +\endcode +However, there is no transposeInPlace() method. + + +\subsection TutorialSparse_Products Matrix products + +%Eigen supports various kind of sparse matrix products which are summarize below: + - \b sparse-dense: + \code +dv2 = sm1 * dv1; +dm2 = dm1 * sm1.adjoint(); +dm2 = 2. * sm1 * dm1; + \endcode + - \b symmetric \b sparse-dense. The product of a sparse symmetric matrix with a dense matrix (or vector) can also be optimized by specifying the symmetry with selfadjointView(): + \code +dm2 = sm1.selfadjointView<>() * dm1; // if all coefficients of A are stored +dm2 = A.selfadjointView() * dm1; // if only the upper part of A is stored +dm2 = A.selfadjointView() * dm1; // if only the lower part of A is stored + \endcode + - \b sparse-sparse. For sparse-sparse products, two different algorithms are available. The default one is conservative and preserve the explicit zeros that might appear: + \code +sm3 = sm1 * sm2; +sm3 = 4 * sm1.adjoint() * sm2; + \endcode + The second algorithm prunes on the fly the explicit zeros, or the values smaller than a given threshold. It is enabled and controlled through the prune() functions: + \code +sm3 = (sm1 * sm2).pruned(); // removes numerical zeros +sm3 = (sm1 * sm2).pruned(ref); // removes elements much smaller than ref +sm3 = (sm1 * sm2).pruned(ref,epsilon); // removes elements smaller than ref*epsilon + \endcode + + - \b permutations. Finally, permutations can be applied to sparse matrices too: + \code +PermutationMatrix P = ...; +sm2 = P * sm1; +sm2 = sm1 * P.inverse(); +sm2 = sm1.transpose() * P; + \endcode + + +\subsection TutorialSparse_SubMatrices Block operations + +Regarding read-access, sparse matrices expose the same API than for dense matrices to access to sub-matrices such as blocks, columns, and rows. See \ref TutorialBlockOperations for a detailed introduction. +However, for performance reasons, writing to a sub-sparse-matrix is much more limited, and currently only contiguous sets of columns (resp. rows) of a column-major (resp. row-major) SparseMatrix are writable. Moreover, this information has to be known at compile-time, leaving out methods such as block(...) and corner*(...). The available API for write-access to a SparseMatrix are summarized below: +\code +SparseMatrix sm1; +sm1.col(j) = ...; +sm1.leftCols(ncols) = ...; +sm1.middleCols(j,ncols) = ...; +sm1.rightCols(ncols) = ...; + +SparseMatrix sm2; +sm2.row(i) = ...; +sm2.topRows(nrows) = ...; +sm2.middleRows(i,nrows) = ...; +sm2.bottomRows(nrows) = ...; +\endcode + +In addition, sparse matrices expose the SparseMatrixBase::innerVector() and SparseMatrixBase::innerVectors() methods, which are aliases to the col/middleCols methods for a column-major storage, and to the row/middleRows methods for a row-major storage. + +\subsection TutorialSparse_TriangularSelfadjoint Triangular and selfadjoint views + +Just as with dense matrices, the triangularView() function can be used to address a triangular part of the matrix, and perform triangular solves with a dense right hand side: +\code +dm2 = sm1.triangularView(dm1); +dv2 = sm1.transpose().triangularView(dv1); +\endcode + +The selfadjointView() function permits various operations: + - optimized sparse-dense matrix products: + \code +dm2 = sm1.selfadjointView<>() * dm1; // if all coefficients of A are stored +dm2 = A.selfadjointView() * dm1; // if only the upper part of A is stored +dm2 = A.selfadjointView() * dm1; // if only the lower part of A is stored + \endcode + - copy of triangular parts: + \code +sm2 = sm1.selfadjointView(); // makes a full selfadjoint matrix from the upper triangular part +sm2.selfadjointView() = sm1.selfadjointView(); // copies the upper triangular part to the lower triangular part + \endcode + - application of symmetric permutations: + \code +PermutationMatrix P = ...; +sm2 = A.selfadjointView().twistedBy(P); // compute P S P' from the upper triangular part of A, and make it a full matrix +sm2.selfadjointView() = A.selfadjointView().twistedBy(P); // compute P S P' from the lower triangular part of A, and then only compute the lower part + \endcode + +Please, refer to the \link SparseQuickRefPage Quick Reference \endlink guide for the list of supported operations. The list of linear solvers available is \link TopicSparseSystems here. \endlink + +*/ + +} diff --git a/thirdparty/eigen/doc/TutorialSparse_example_details.dox b/thirdparty/eigen/doc/TutorialSparse_example_details.dox new file mode 100644 index 000000000..0438da8bb --- /dev/null +++ b/thirdparty/eigen/doc/TutorialSparse_example_details.dox @@ -0,0 +1,4 @@ +/** +\page TutorialSparse_example_details +\include Tutorial_sparse_example_details.cpp +*/ diff --git a/thirdparty/eigen/doc/UnalignedArrayAssert.dox b/thirdparty/eigen/doc/UnalignedArrayAssert.dox new file mode 100644 index 000000000..95d95a2d5 --- /dev/null +++ b/thirdparty/eigen/doc/UnalignedArrayAssert.dox @@ -0,0 +1,120 @@ +namespace Eigen { + +/** \eigenManualPage TopicUnalignedArrayAssert Explanation of the assertion on unaligned arrays + +Hello! You are seeing this webpage because your program terminated on an assertion failure like this one: +

+my_program: path/to/eigen/Eigen/src/Core/DenseStorage.h:44:
+Eigen::internal::matrix_array::internal::matrix_array()
+[with T = double, int Size = 2, int MatrixOptions = 2, bool Align = true]:
+Assertion `(reinterpret_cast(array) & (sizemask)) == 0 && "this assertion
+is explained here: http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html
+**** READ THIS WEB PAGE !!! ****"' failed.
+
+ +There are 4 known causes for this issue. Please read on to understand them and learn how to fix them. + +\eigenAutoToc + +\section where Where in my own code is the cause of the problem? + +First of all, you need to find out where in your own code this assertion was triggered from. At first glance, the error message doesn't look helpful, as it refers to a file inside Eigen! However, since your program crashed, if you can reproduce the crash, you can get a backtrace using any debugger. For example, if you're using GCC, you can use the GDB debugger as follows: +\code +$ gdb ./my_program # Start GDB on your program +> run # Start running your program +... # Now reproduce the crash! +> bt # Obtain the backtrace +\endcode +Now that you know precisely where in your own code the problem is happening, read on to understand what you need to change. + +\section c1 Cause 1: Structures having Eigen objects as members + +If you have code like this, + +\code +class Foo +{ + //... + Eigen::Vector2d v; + //... +}; +//... +Foo *foo = new Foo; +\endcode + +then you need to read this separate page: \ref TopicStructHavingEigenMembers "Structures Having Eigen Members". + +Note that here, Eigen::Vector2d is only used as an example, more generally the issue arises for all \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types". + +\section c2 Cause 2: STL Containers or manual memory allocation + +If you use STL Containers such as std::vector, std::map, ..., with %Eigen objects, or with classes containing %Eigen objects, like this, + +\code +std::vector my_vector; +struct my_class { ... Eigen::Matrix2f m; ... }; +std::map my_map; +\endcode + +then you need to read this separate page: \ref TopicStlContainers "Using STL Containers with Eigen". + +Note that here, Eigen::Matrix2f is only used as an example, more generally the issue arises for all \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types" and \ref TopicStructHavingEigenMembers "structures having such Eigen objects as member". + +The same issue will be exhibited by any classes/functions by-passing operator new to allocate memory, that is, by performing custom memory allocation followed by calls to the placement new operator. This is for instance typically the case of \c std::make_shared or \c std::allocate_shared for which is the solution is to use an \ref aligned_allocator "aligned allocator" as detailed in the \ref TopicStlContainers "solution for STL containers". + +\section c3 Cause 3: Passing Eigen objects by value + +If some function in your code is getting an Eigen object passed by value, like this, + +\code +void func(Eigen::Vector4d v); +\endcode + +then you need to read this separate page: \ref TopicPassingByValue "Passing Eigen objects by value to functions". + +Note that here, Eigen::Vector4d is only used as an example, more generally the issue arises for all \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types". + +\section c4 Cause 4: Compiler making a wrong assumption on stack alignment (for instance GCC on Windows) + +This is a must-read for people using GCC on Windows (like MinGW or TDM-GCC). If you have this assertion failure in an innocent function declaring a local variable like this: + +\code +void foo() +{ + Eigen::Quaternionf q; + //... +} +\endcode + +then you need to read this separate page: \ref TopicWrongStackAlignment "Compiler making a wrong assumption on stack alignment". + +Note that here, Eigen::Quaternionf is only used as an example, more generally the issue arises for all \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types". + +\section explanation General explanation of this assertion + +\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions addressing them will crash. + +Eigen normally takes care of these alignment issues for you, by setting an alignment attribute on them and by overloading their "operator new". + +However there are a few corner cases where these alignment settings get overridden: they are the possible causes for this assertion. + +\section getrid I don't care about optimal vectorization, how do I get rid of that stuff? + +Three possibilities: +
    +
  • Use the \c DontAlign option to Matrix, Array, Quaternion, etc. objects that gives you trouble. This way Eigen won't try to align them, and thus won"t assume any special alignment. On the down side, you will pay the cost of unaligned loads/stores for them, but on modern CPUs, the overhead is either null or marginal. See \link StructHavingEigenMembers_othersolutions here \endlink for an example.
  • +
  • Define \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY \endlink. That disables all 16-byte (and above) static alignment code, while keeping 16-byte (or above) heap alignment. This has the effect of + vectorizing fixed-size objects (like Matrix4d) through unaligned stores (as controlled by \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink), while keeping unchanged the vectorization of dynamic-size objects + (like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of static alignment.
  • +
  • Or define both \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_VECTORIZE \endlink and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the + 16-byte alignment code and thus preserves ABI compatibility, but completely disables vectorization.
  • +
+ +If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 16-byte alignment and the assertion, here's the explanation: + +It doesn't disable the assertion, because otherwise code that runs fine without vectorization would suddenly crash when enabling vectorization. +It doesn't disable 16-byte alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path. + +*/ + +} diff --git a/thirdparty/eigen/doc/UsingBlasLapackBackends.dox b/thirdparty/eigen/doc/UsingBlasLapackBackends.dox new file mode 100644 index 000000000..caa597122 --- /dev/null +++ b/thirdparty/eigen/doc/UsingBlasLapackBackends.dox @@ -0,0 +1,133 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2011-2016 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Documentation on the use of BLAS/LAPACK libraries through Eigen + ******************************************************************************** +*/ + +namespace Eigen { + +/** \page TopicUsingBlasLapack Using BLAS/LAPACK from %Eigen + + +Since %Eigen version 3.3 and later, any F77 compatible BLAS or LAPACK libraries can be used as backends for dense matrix products and dense matrix decompositions. +For instance, one can use Intel® MKL, Apple's Accelerate framework on OSX, OpenBLAS, Netlib LAPACK, etc. + +Do not miss this \link TopicUsingIntelMKL page \endlink for further discussions on the specific use of Intel® MKL (also includes VML, PARDISO, etc.) + +In order to use an external BLAS and/or LAPACK library, you must link you own application to the respective libraries and their dependencies. +For LAPACK, you must also link to the standard Lapacke library, which is used as a convenient think layer between %Eigen's C++ code and LAPACK F77 interface. Then you must activate their usage by defining one or multiple of the following macros (\b before including any %Eigen's header): + +\note For Mac users, in order to use the lapack version shipped with the Accelerate framework, you also need the lapacke library. +Using MacPorts, this is as easy as: +\code +sudo port install lapack +\endcode +and then use the following link flags: \c -framework \c Accelerate \c /opt/local/lib/lapack/liblapacke.dylib + + + + + +
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines (compatible with any F77 BLAS interface)
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Lapacke C interface to Lapack (compatible with any F77 LAPACK interface)
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithms of lower numerical robustness are disabled. \n This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
+ +When doing so, a number of %Eigen's algorithms are silently substituted with calls to BLAS or LAPACK routines. +These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex, and \c complex. +Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. + +The breadth of %Eigen functionality that can be substituted is listed in the table below. + + + + + + + + + + +
Functional domainCode exampleBLAS/LAPACK routines
Matrix-matrix operations \n \c EIGEN_USE_BLAS \code +m1*m2.transpose(); +m1.selfadjointView()*m2; +m1*m2.triangularView(); +m1.selfadjointView().rankUpdate(m2,1.0); +\endcode\code +?gemm +?symm/?hemm +?trmm +dsyrk/ssyrk +\endcode
Matrix-vector operations \n \c EIGEN_USE_BLAS \code +m1.adjoint()*b; +m1.selfadjointView()*b; +m1.triangularView()*b; +\endcode\code +?gemv +?symv/?hemv +?trmv +\endcode
LU decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +v1 = m1.lu().solve(v2); +\endcode\code +?getrf +\endcode
Cholesky decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +v1 = m2.selfadjointView().llt().solve(v2); +\endcode\code +?potrf +\endcode
QR decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +m1.householderQr(); +m1.colPivHouseholderQr(); +\endcode\code +?geqrf +?geqp3 +\endcode
Singular value decomposition \n \c EIGEN_USE_LAPACKE \code +JacobiSVD svd; +svd.compute(m1, ComputeThinV); +\endcode\code +?gesvd +\endcode
Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +EigenSolver es(m1); +ComplexEigenSolver ces(m1); +SelfAdjointEigenSolver saes(m1+m1.transpose()); +GeneralizedSelfAdjointEigenSolver + gsaes(m1+m1.transpose(),m2+m2.transpose()); +\endcode\code +?gees +?gees +?syev/?heev +?syev/?heev, +?potrf +\endcode
Schur decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +RealSchur schurR(m1); +ComplexSchur schurC(m1); +\endcode\code +?gees +\endcode
+In the examples, m1 and m2 are dense matrices and v1 and v2 are dense vectors. + +*/ + +} diff --git a/thirdparty/eigen/doc/UsingIntelMKL.dox b/thirdparty/eigen/doc/UsingIntelMKL.dox new file mode 100644 index 000000000..a1a3a18f2 --- /dev/null +++ b/thirdparty/eigen/doc/UsingIntelMKL.dox @@ -0,0 +1,107 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2011 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Documentation on the use of Intel MKL through Eigen + ******************************************************************************** +*/ + +namespace Eigen { + +/** \page TopicUsingIntelMKL Using Intel® MKL from %Eigen + + + +Since %Eigen version 3.1 and later, users can benefit from built-in Intel® Math Kernel Library (MKL) optimizations with an installed copy of Intel MKL 10.3 (or later). + + Intel MKL provides highly optimized multi-threaded mathematical routines for x86-compatible architectures. +Intel MKL is available on Linux, Mac and Windows for both Intel64 and IA32 architectures. + +\note +Intel® MKL is a proprietary software and it is the responsibility of users to buy or register for community (free) Intel MKL licenses for their products. Moreover, the license of the user product has to allow linking to proprietary software that excludes any unmodified versions of the GPL. + +Using Intel MKL through %Eigen is easy: +-# define the \c EIGEN_USE_MKL_ALL macro before including any %Eigen's header +-# link your program to MKL libraries (see the MKL linking advisor) +-# on a 64bits system, you must use the LP64 interface (not the ILP64 one) + +When doing so, a number of %Eigen's algorithms are silently substituted with calls to Intel MKL routines. +These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex, and \c complex. +Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. + +In addition you can choose which parts will be substituted by defining one or multiple of the following macros: + + + + + + + +
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Lapacke C interface to Lapack
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithm of lower robustness are disabled. \n This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
\c EIGEN_USE_MKL_VML Enables the use of Intel VML (vector operations)
\c EIGEN_USE_MKL_ALL Defines \c EIGEN_USE_BLAS, \c EIGEN_USE_LAPACKE, and \c EIGEN_USE_MKL_VML
+ +Note that the BLAS and LAPACKE backends can be enabled for any F77 compatible BLAS and LAPACK libraries. See this \link TopicUsingBlasLapack page \endlink for the details. + +Finally, the PARDISO sparse solver shipped with Intel MKL can be used through the \ref PardisoLU, \ref PardisoLLT and \ref PardisoLDLT classes of the \ref PardisoSupport_Module. + +The following table summarizes the list of functions covered by \c EIGEN_USE_MKL_VML: + + + +
Code exampleMKL routines
\code +v2=v1.array().sin(); +v2=v1.array().asin(); +v2=v1.array().cos(); +v2=v1.array().acos(); +v2=v1.array().tan(); +v2=v1.array().exp(); +v2=v1.array().log(); +v2=v1.array().sqrt(); +v2=v1.array().square(); +v2=v1.array().pow(1.5); +\endcode\code +v?Sin +v?Asin +v?Cos +v?Acos +v?Tan +v?Exp +v?Ln +v?Sqrt +v?Sqr +v?Powx +\endcode
+In the examples, v1 and v2 are dense vectors. + + +\section TopicUsingIntelMKL_Links Links +- Intel MKL can be purchased and downloaded here. +- Intel MKL is also bundled with Intel Composer XE. + + +*/ + +} diff --git a/thirdparty/eigen/doc/UsingNVCC.dox b/thirdparty/eigen/doc/UsingNVCC.dox new file mode 100644 index 000000000..f8e755b79 --- /dev/null +++ b/thirdparty/eigen/doc/UsingNVCC.dox @@ -0,0 +1,32 @@ + +namespace Eigen { + +/** \page TopicCUDA Using Eigen in CUDA kernels + +\b Disclaimer: this page is about an \b experimental feature in %Eigen. + +Staring from CUDA 5.0, the CUDA compiler, \c nvcc, is able to properly parse %Eigen's code (almost). +A few adaptations of the %Eigen's code already allows to use some parts of %Eigen in your own CUDA kernels. +To this end you need the devel branch of %Eigen, CUDA 5.0 or greater with GCC. + +Known issues: + + - \c nvcc with MS Visual Studio does not work (patch welcome) + + - \c nvcc with \c clang does not work (patch welcome) + + - \c nvcc 5.5 with gcc-4.7 (or greater) has issues with the standard \c \ header file. To workaround this, you can add the following before including any other files: + \code + // workaround issue between gcc >= 4.7 and cuda 5.5 + #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7) + #undef _GLIBCXX_ATOMIC_BUILTINS + #undef _GLIBCXX_USE_INT128 + #endif + \endcode + + - On 64bits system Eigen uses \c long \c int as the default type for indexes and sizes. On CUDA device, it would make sense to default to 32 bits \c int. + However, to keep host and CUDA code compatible, this cannot be done automatically by %Eigen, and the user is thus required to define \c EIGEN_DEFAULT_DENSE_INDEX_TYPE to \c int throughout his code (or only for CUDA code if there is no interaction between host and CUDA code through %Eigen's object). + +*/ + +} diff --git a/thirdparty/eigen/doc/WrongStackAlignment.dox b/thirdparty/eigen/doc/WrongStackAlignment.dox new file mode 100644 index 000000000..17d5513a7 --- /dev/null +++ b/thirdparty/eigen/doc/WrongStackAlignment.dox @@ -0,0 +1,56 @@ +namespace Eigen { + +/** \eigenManualPage TopicWrongStackAlignment Compiler making a wrong assumption on stack alignment + +

It appears that this was a GCC bug that has been fixed in GCC 4.5. +If you hit this issue, please upgrade to GCC 4.5 and report to us, so we can update this page.

+ +This is an issue that, so far, we met only with GCC on Windows: for instance, MinGW and TDM-GCC. + +By default, in a function like this, + +\code +void foo() +{ + Eigen::Quaternionf q; + //... +} +\endcode + +GCC assumes that the stack is already 16-byte-aligned so that the object \a q will be created at a 16-byte-aligned location. For this reason, it doesn't take any special care to explicitly align the object \a q, as Eigen requires. + +The problem is that, in some particular cases, this assumption can be wrong on Windows, where the stack is only guaranteed to have 4-byte alignment. Indeed, even though GCC takes care of aligning the stack in the main function and does its best to keep it aligned, when a function is called from another thread or from a binary compiled with another compiler, the stack alignment can be corrupted. This results in the object 'q' being created at an unaligned location, making your program crash with the \ref TopicUnalignedArrayAssert "assertion on unaligned arrays". So far we found the three following solutions. + + +\section sec_sol1 Local solution + +A local solution is to mark such a function with this attribute: +\code +__attribute__((force_align_arg_pointer)) void foo() +{ + Eigen::Quaternionf q; + //... +} +\endcode +Read this GCC documentation to understand what this does. Of course this should only be done on GCC on Windows, so for portability you'll have to encapsulate this in a macro which you leave empty on other platforms. The advantage of this solution is that you can finely select which function might have a corrupted stack alignment. Of course on the downside this has to be done for every such function, so you may prefer one of the following two global solutions. + + +\section sec_sol2 Global solutions + +A global solution is to edit your project so that when compiling with GCC on Windows, you pass this option to GCC: +\code +-mincoming-stack-boundary=2 +\endcode +Explanation: this tells GCC that the stack is only required to be aligned to 2^2=4 bytes, so that GCC now knows that it really must take extra care to honor the 16 byte alignment of \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types" when needed. + +Another global solution is to pass this option to gcc: +\code +-mstackrealign +\endcode +which has the same effect than adding the \c force_align_arg_pointer attribute to all functions. + +These global solutions are easy to use, but note that they may slowdown your program because they lead to extra prologue/epilogue instructions for every function. + +*/ + +} diff --git a/thirdparty/eigen/doc/eigen_navtree_hacks.js b/thirdparty/eigen/doc/eigen_navtree_hacks.js new file mode 100644 index 000000000..bd7e02b38 --- /dev/null +++ b/thirdparty/eigen/doc/eigen_navtree_hacks.js @@ -0,0 +1,240 @@ + +// generate a table of contents in the side-nav based on the h1/h2 tags of the current page. +function generate_autotoc() { + var headers = $("h1, h2"); + if(headers.length > 1) { + var toc = $("#side-nav").append(''); + toc = $("#nav-toc"); + var footerHeight = footer.height(); + toc = toc.append('
    '); + toc = toc.find('ul'); + var indices = new Array(); + indices[0] = 0; + indices[1] = 0; + + var h1counts = $("h1").length; + headers.each(function(i) { + var current = $(this); + var levelTag = current[0].tagName.charAt(1); + if(h1counts==0) + levelTag--; + var cur_id = current.attr("id"); + + indices[levelTag-1]+=1; + var prefix = indices[0]; + if (levelTag >1) { + prefix+="."+indices[1]; + } + + // Uncomment to add number prefixes + // current.html(prefix + " " + current.html()); + for(var l = levelTag; l < 2; ++l){ + indices[l] = 0; + } + + if(cur_id == undefined) { + current.attr('id', 'title' + i); + current.addClass('anchor'); + toc.append("
  • " + current.text() + "
  • "); + } else { + toc.append("
  • " + current.text() + "
  • "); + } + }); + resizeHeight(); + } +} + + +var global_navtree_object; + +// Overloaded to remove links to sections/subsections +function getNode(o, po) +{ + po.childrenVisited = true; + var l = po.childrenData.length-1; + for (var i in po.childrenData) { + var nodeData = po.childrenData[i]; + if((!nodeData[1]) || (nodeData[1].indexOf('#')==-1)) // <- we added this line + po.children[i] = newNode(o, po, nodeData[0], nodeData[1], nodeData[2], i==l); + } +} + +// Overloaded to adjust the size of the navtree wrt the toc +function resizeHeight() +{ + var toc = $("#nav-toc"); + var tocHeight = toc.height(); // <- we added this line + var headerHeight = header.height(); + var footerHeight = footer.height(); + var windowHeight = $(window).height() - headerHeight - footerHeight; + content.css({height:windowHeight + "px"}); + navtree.css({height:(windowHeight-tocHeight) + "px"}); // <- we modified this line + sidenav.css({height:(windowHeight) + "px",top: headerHeight+"px"}); +} + +// Overloaded to save the root node into global_navtree_object +function initNavTree(toroot,relpath) +{ + var o = new Object(); + global_navtree_object = o; // <- we added this line + o.toroot = toroot; + o.node = new Object(); + o.node.li = document.getElementById("nav-tree-contents"); + o.node.childrenData = NAVTREE; + o.node.children = new Array(); + o.node.childrenUL = document.createElement("ul"); + o.node.getChildrenUL = function() { return o.node.childrenUL; }; + o.node.li.appendChild(o.node.childrenUL); + o.node.depth = 0; + o.node.relpath = relpath; + o.node.expanded = false; + o.node.isLast = true; + o.node.plus_img = document.createElement("img"); + o.node.plus_img.src = relpath+"ftv2pnode.png"; + o.node.plus_img.width = 16; + o.node.plus_img.height = 22; + + if (localStorageSupported()) { + var navSync = $('#nav-sync'); + if (cachedLink()) { + showSyncOff(navSync,relpath); + navSync.removeClass('sync'); + } else { + showSyncOn(navSync,relpath); + } + navSync.click(function(){ toggleSyncButton(relpath); }); + } + + navTo(o,toroot,window.location.hash,relpath); + + $(window).bind('hashchange', function(){ + if (window.location.hash && window.location.hash.length>1){ + var a; + if ($(location).attr('hash')){ + var clslink=stripPath($(location).attr('pathname'))+':'+ + $(location).attr('hash').substring(1); + a=$('.item a[class$="'+clslink+'"]'); + } + if (a==null || !$(a).parent().parent().hasClass('selected')){ + $('.item').removeClass('selected'); + $('.item').removeAttr('id'); + } + var link=stripPath2($(location).attr('pathname')); + navTo(o,link,$(location).attr('hash'),relpath); + } else if (!animationInProgress) { + $('#doc-content').scrollTop(0); + $('.item').removeClass('selected'); + $('.item').removeAttr('id'); + navTo(o,toroot,window.location.hash,relpath); + } + }) + + $(window).load(showRoot); +} + +// return false if the the node has no children at all, or has only section/subsection children +function checkChildrenData(node) { + if (!(typeof(node.childrenData)==='string')) { + for (var i in node.childrenData) { + var url = node.childrenData[i][1]; + if(url.indexOf("#")==-1) + return true; + } + return false; + } + return (node.childrenData); +} + +// Modified to: +// 1 - remove the root node +// 2 - remove the section/subsection children +function createIndent(o,domNode,node,level) +{ + var level=-2; // <- we replaced level=-1 by level=-2 + var n = node; + while (n.parentNode) { level++; n=n.parentNode; } + var imgNode = document.createElement("img"); + imgNode.style.paddingLeft=(16*(level)).toString()+'px'; + imgNode.width = 16; + imgNode.height = 22; + imgNode.border = 0; + if (checkChildrenData(node)) { // <- we modified this line to use checkChildrenData(node) instead of node.childrenData + node.plus_img = imgNode; + node.expandToggle = document.createElement("a"); + node.expandToggle.href = "javascript:void(0)"; + node.expandToggle.onclick = function() { + if (node.expanded) { + $(node.getChildrenUL()).slideUp("fast"); + node.plus_img.src = node.relpath+"ftv2pnode.png"; + node.expanded = false; + } else { + expandNode(o, node, false, false); + } + } + node.expandToggle.appendChild(imgNode); + domNode.appendChild(node.expandToggle); + imgNode.src = node.relpath+"ftv2pnode.png"; + } else { + imgNode.src = node.relpath+"ftv2node.png"; + domNode.appendChild(imgNode); + } +} + +// Overloaded to automatically expand the selected node +function selectAndHighlight(hash,n) +{ + var a; + if (hash) { + var link=stripPath($(location).attr('pathname'))+':'+hash.substring(1); + a=$('.item a[class$="'+link+'"]'); + } + if (a && a.length) { + a.parent().parent().addClass('selected'); + a.parent().parent().attr('id','selected'); + highlightAnchor(); + } else if (n) { + $(n.itemDiv).addClass('selected'); + $(n.itemDiv).attr('id','selected'); + } + if ($('#nav-tree-contents .item:first').hasClass('selected')) { + $('#nav-sync').css('top','30px'); + } else { + $('#nav-sync').css('top','5px'); + } + expandNode(global_navtree_object, n, true, true); // <- we added this line + showRoot(); +} + + +$(document).ready(function() { + + generate_autotoc(); + + (function (){ // wait until the first "selected" element has been created + try { + + // this line will triger an exception if there is no #selected element, i.e., before the tree structure is complete. + document.getElementById("selected").className = "item selected"; + + // ok, the default tree has been created, we can keep going... + + // expand the "Chapters" node + if(window.location.href.indexOf('unsupported')==-1) + expandNode(global_navtree_object, global_navtree_object.node.children[0].children[2], true, true); + else + expandNode(global_navtree_object, global_navtree_object.node.children[0].children[1], true, true); + + // Hide the root node "Eigen" + $(document.getElementsByClassName('index.html')[0]).parent().parent().css({display:"none"}); + + } catch (err) { + setTimeout(arguments.callee, 10); + } + })(); +}); + +$(window).load(function() { + resizeHeight(); +}); diff --git a/thirdparty/eigen/doc/eigendoxy.css b/thirdparty/eigen/doc/eigendoxy.css new file mode 100644 index 000000000..6274e6c70 --- /dev/null +++ b/thirdparty/eigen/doc/eigendoxy.css @@ -0,0 +1,216 @@ + +/******** Eigen specific CSS code ************/ + +/**** Styles removing elements ****/ + +/* remove the "modules|classes" link for module pages (they are already in the TOC) */ +div.summary { + display:none; +} + +/* remove */ +div.contents hr { + display:none; +} + +/**** ****/ + +p, dl.warning, dl.attention, dl.note +{ + max-width:60em; + text-align:justify; +} + +li { + max-width:55em; + text-align:justify; +} + +img { + border: 0; +} + +div.fragment { + display:table; /* this allows the element to be larger than its parent */ + padding: 0pt; +} +pre.fragment { + border: 1px solid #cccccc; + + margin: 2px 0px 2px 0px; + padding: 3px 5px 3px 5px; +} + + + +/* Common style for all Eigen's tables */ + +table.example, table.manual, table.manual-vl, table.manual-hl { + max-width:100%; + border-collapse: collapse; + border-style: solid; + border-width: 1px; + border-color: #cccccc; + font-size: 1em; + + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -moz-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +table.example th, table.manual th, table.manual-vl th, table.manual-hl th { + padding: 0.5em 0.5em 0.5em 0.5em; + text-align: left; + padding-right: 1em; + color: #555555; + background-color: #F4F4E5; + + background-image: -webkit-gradient(linear,center top,center bottom,from(#FFFFFF), color-stop(0.3,#FFFFFF), color-stop(0.30,#FFFFFF), color-stop(0.98,#F4F4E5), to(#ECECDE)); + background-image: -moz-linear-gradient(center top, #FFFFFF 0%, #FFFFFF 30%, #F4F4E5 98%, #ECECDE); + filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#FFFFFF', endColorstr='#F4F4E5'); +} + +table.example td, table.manual td, table.manual-vl td, table.manual-hl td { + vertical-align:top; + border-width: 1px; + border-color: #cccccc; +} + +/* header of headers */ +table th.meta { + text-align:center; + font-size: 1.2em; + background-color:#FFFFFF; +} + +/* intermediate header */ +table th.inter { + text-align:left; + background-color:#FFFFFF; + background-image:none; + border-style:solid solid solid solid; + border-width: 1px; + border-color: #cccccc; +} + +/** class for exemple / output tables **/ + +table.example { +} + +table.example th { +} + +table.example td { + padding: 0.5em 0.5em 0.5em 0.5em; + vertical-align:top; +} + +/* standard class for the manual */ + +table.manual, table.manual-vl, table.manual-hl { + padding: 0.2em 0em 0.5em 0em; +} + +table.manual th, table.manual-vl th, table.manual-hl th { + margin: 0em 0em 0.3em 0em; +} + +table.manual td, table.manual-vl td, table.manual-hl td { + padding: 0.3em 0.5em 0.3em 0.5em; + vertical-align:top; + border-width: 1px; +} + +table.manual td.alt, table.manual tr.alt, table.manual-vl td.alt, table.manual-vl tr.alt { + background-color: #F4F4E5; +} + +table.manual-vl th, table.manual-vl td, table.manual-vl td.alt { + border-color: #cccccc; + border-width: 1px; + border-style: none solid none solid; +} + +table.manual-vl th.inter { + border-style: solid solid solid solid; +} + +table.manual-hl td { + border-color: #cccccc; + border-width: 1px; + border-style: solid none solid none; +} + +table td.code { + font-family: monospace; +} + +h2 { + margin-top:2em; + border-style: none none solid none; + border-width: 1px; + border-color: #cccccc; +} + +/**** Table of content in the side-nav ****/ + + +div.toc { + margin:0; + padding: 0.3em 0 0 0; + width:100%; + float:none; + position:absolute; + bottom:0; + border-radius:0px; + border-style: solid none none none; +} + +div.toc h3 { + margin-left: 0.5em; + margin-bottom: 0.2em; +} + +div.toc ul { + margin: 0.2em 0 0.4em 0.5em; +} + +span.cpp11,span.cpp14,span.cpp17 { + color: #119911; + font-weight: bold; +} + +/**** old Eigen's styles ****/ + + +table.tutorial_code td { + border-color: transparent; /* required for Firefox */ + padding: 3pt 5pt 3pt 5pt; + vertical-align: top; +} + + +/* Whenever doxygen meets a '\n' or a '
    ', it will put + * the text containing the character into a

    . + * This little hack together with table.tutorial_code td.note + * aims at fixing this issue. */ +table.tutorial_code td.note p.starttd { + margin: 0px; + border: none; + padding: 0px; +} + +div.eimainmenu { + text-align: center; +} + +/* center version number on main page */ +h3.version { + text-align: center; +} + + +td.width20em p.endtd { + width: 20em; +} diff --git a/thirdparty/eigen/doc/eigendoxy_footer.html.in b/thirdparty/eigen/doc/eigendoxy_footer.html.in new file mode 100644 index 000000000..878244a19 --- /dev/null +++ b/thirdparty/eigen/doc/eigendoxy_footer.html.in @@ -0,0 +1,36 @@ + + +

    + + + + + + + + + + + + + diff --git a/thirdparty/eigen/doc/eigendoxy_header.html.in b/thirdparty/eigen/doc/eigendoxy_header.html.in new file mode 100644 index 000000000..0f3859f40 --- /dev/null +++ b/thirdparty/eigen/doc/eigendoxy_header.html.in @@ -0,0 +1,61 @@ + + + + + + +$projectname: $title +$title + + + +$treeview +$search +$mathjax + + + + + + + + + +
    + + + +
    + + + + + + + + + + + + + + + + + + + + + +
    +
    $projectname +  $projectnumber +
    +
    $projectbrief
    +
    +
    $projectbrief
    +
    $searchbox
    +
    + + + diff --git a/thirdparty/eigen/doc/eigendoxy_layout.xml.in b/thirdparty/eigen/doc/eigendoxy_layout.xml.in new file mode 100644 index 000000000..c14b621e5 --- /dev/null +++ b/thirdparty/eigen/doc/eigendoxy_layout.xml.in @@ -0,0 +1,178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/thirdparty/eigen/doc/eigendoxy_tabs.css b/thirdparty/eigen/doc/eigendoxy_tabs.css new file mode 100644 index 000000000..21920562a --- /dev/null +++ b/thirdparty/eigen/doc/eigendoxy_tabs.css @@ -0,0 +1,59 @@ +.tabs, .tabs2, .tabs3 { + background-image: url('tab_b.png'); + width: 100%; + z-index: 101; + font-size: 13px; +} + +.tabs2 { + font-size: 10px; +} +.tabs3 { + font-size: 9px; +} + +.tablist { + margin: 0; + padding: 0; + display: table; +} + +.tablist li { + float: left; + display: table-cell; + background-image: url('tab_b.png'); + line-height: 36px; + list-style: none; +} + +.tablist a { + display: block; + padding: 0 20px; + font-weight: bold; + background-image:url('tab_s.png'); + background-repeat:no-repeat; + background-position:right; + color: #283A5D; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + text-decoration: none; + outline: none; +} + +.tabs3 .tablist a { + padding: 0 10px; +} + +.tablist a:hover { + background-image: url('tab_h.png'); + background-repeat:repeat-x; + color: #fff; + text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0); + text-decoration: none; +} + +.tablist li.current a { + background-image: url('tab_a.png'); + background-repeat:repeat-x; + color: #fff; + text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0); +} diff --git a/thirdparty/eigen/doc/examples/.krazy b/thirdparty/eigen/doc/examples/.krazy new file mode 100644 index 000000000..00b99405d --- /dev/null +++ b/thirdparty/eigen/doc/examples/.krazy @@ -0,0 +1,2 @@ +EXCLUDE copyright +EXCLUDE license diff --git a/thirdparty/eigen/doc/examples/CMakeLists.txt b/thirdparty/eigen/doc/examples/CMakeLists.txt new file mode 100644 index 000000000..f7a19055f --- /dev/null +++ b/thirdparty/eigen/doc/examples/CMakeLists.txt @@ -0,0 +1,21 @@ +file(GLOB examples_SRCS "*.cpp") + +foreach(example_src ${examples_SRCS}) + get_filename_component(example ${example_src} NAME_WE) + add_executable(${example} ${example_src}) + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(${example} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() + add_custom_command( + TARGET ${example} + POST_BUILD + COMMAND ${example} + ARGS >${CMAKE_CURRENT_BINARY_DIR}/${example}.out + ) + add_dependencies(all_examples ${example}) +endforeach(example_src) + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) +ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11") +endif() \ No newline at end of file diff --git a/thirdparty/eigen/doc/examples/CustomizingEigen_Inheritance.cpp b/thirdparty/eigen/doc/examples/CustomizingEigen_Inheritance.cpp new file mode 100644 index 000000000..48df64ee3 --- /dev/null +++ b/thirdparty/eigen/doc/examples/CustomizingEigen_Inheritance.cpp @@ -0,0 +1,30 @@ +#include +#include + +class MyVectorType : public Eigen::VectorXd +{ +public: + MyVectorType(void):Eigen::VectorXd() {} + + // This constructor allows you to construct MyVectorType from Eigen expressions + template + MyVectorType(const Eigen::MatrixBase& other) + : Eigen::VectorXd(other) + { } + + // This method allows you to assign Eigen expressions to MyVectorType + template + MyVectorType& operator=(const Eigen::MatrixBase & other) + { + this->Eigen::VectorXd::operator=(other); + return *this; + } +}; + +int main() +{ + MyVectorType v = MyVectorType::Ones(4); + v(2) += 10; + v = 2 * v; + std::cout << v.transpose() << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Cwise_erf.cpp b/thirdparty/eigen/doc/examples/Cwise_erf.cpp new file mode 100644 index 000000000..e7cd2c1c0 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Cwise_erf.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(-0.5,2,0,-7); + std::cout << v.erf() << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Cwise_erfc.cpp b/thirdparty/eigen/doc/examples/Cwise_erfc.cpp new file mode 100644 index 000000000..d8bb04c30 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Cwise_erfc.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(-0.5,2,0,-7); + std::cout << v.erfc() << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Cwise_lgamma.cpp b/thirdparty/eigen/doc/examples/Cwise_lgamma.cpp new file mode 100644 index 000000000..f1c4f503e --- /dev/null +++ b/thirdparty/eigen/doc/examples/Cwise_lgamma.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(0.5,10,0,-1); + std::cout << v.lgamma() << std::endl; +} \ No newline at end of file diff --git a/thirdparty/eigen/doc/examples/DenseBase_middleCols_int.cpp b/thirdparty/eigen/doc/examples/DenseBase_middleCols_int.cpp new file mode 100644 index 000000000..0ebd955ec --- /dev/null +++ b/thirdparty/eigen/doc/examples/DenseBase_middleCols_int.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main(void) +{ + int const N = 5; + MatrixXi A(N,N); + A.setRandom(); + cout << "A =\n" << A << '\n' << endl; + cout << "A(1..3,:) =\n" << A.middleCols(1,3) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/DenseBase_middleRows_int.cpp b/thirdparty/eigen/doc/examples/DenseBase_middleRows_int.cpp new file mode 100644 index 000000000..a6fe9e844 --- /dev/null +++ b/thirdparty/eigen/doc/examples/DenseBase_middleRows_int.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main(void) +{ + int const N = 5; + MatrixXi A(N,N); + A.setRandom(); + cout << "A =\n" << A << '\n' << endl; + cout << "A(2..3,:) =\n" << A.middleRows(2,2) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/DenseBase_template_int_middleCols.cpp b/thirdparty/eigen/doc/examples/DenseBase_template_int_middleCols.cpp new file mode 100644 index 000000000..6191d79c8 --- /dev/null +++ b/thirdparty/eigen/doc/examples/DenseBase_template_int_middleCols.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main(void) +{ + int const N = 5; + MatrixXi A(N,N); + A.setRandom(); + cout << "A =\n" << A << '\n' << endl; + cout << "A(:,1..3) =\n" << A.middleCols<3>(1) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/DenseBase_template_int_middleRows.cpp b/thirdparty/eigen/doc/examples/DenseBase_template_int_middleRows.cpp new file mode 100644 index 000000000..7e8b6573f --- /dev/null +++ b/thirdparty/eigen/doc/examples/DenseBase_template_int_middleRows.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main(void) +{ + int const N = 5; + MatrixXi A(N,N); + A.setRandom(); + cout << "A =\n" << A << '\n' << endl; + cout << "A(1..3,:) =\n" << A.middleRows<3>(1) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/QuickStart_example.cpp b/thirdparty/eigen/doc/examples/QuickStart_example.cpp new file mode 100644 index 000000000..7238c0c43 --- /dev/null +++ b/thirdparty/eigen/doc/examples/QuickStart_example.cpp @@ -0,0 +1,14 @@ +#include +#include + +using Eigen::MatrixXd; + +int main() +{ + MatrixXd m(2,2); + m(0,0) = 3; + m(1,0) = 2.5; + m(0,1) = -1; + m(1,1) = m(1,0) + m(0,1); + std::cout << m << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/QuickStart_example2_dynamic.cpp b/thirdparty/eigen/doc/examples/QuickStart_example2_dynamic.cpp new file mode 100644 index 000000000..ff6746e21 --- /dev/null +++ b/thirdparty/eigen/doc/examples/QuickStart_example2_dynamic.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXd m = MatrixXd::Random(3,3); + m = (m + MatrixXd::Constant(3,3,1.2)) * 50; + cout << "m =" << endl << m << endl; + VectorXd v(3); + v << 1, 2, 3; + cout << "m * v =" << endl << m * v << endl; +} diff --git a/thirdparty/eigen/doc/examples/QuickStart_example2_fixed.cpp b/thirdparty/eigen/doc/examples/QuickStart_example2_fixed.cpp new file mode 100644 index 000000000..d91175273 --- /dev/null +++ b/thirdparty/eigen/doc/examples/QuickStart_example2_fixed.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + Matrix3d m = Matrix3d::Random(); + m = (m + Matrix3d::Constant(1.2)) * 50; + cout << "m =" << endl << m << endl; + Vector3d v(1,2,3); + + cout << "m * v =" << endl << m * v << endl; +} diff --git a/thirdparty/eigen/doc/examples/TemplateKeyword_flexible.cpp b/thirdparty/eigen/doc/examples/TemplateKeyword_flexible.cpp new file mode 100644 index 000000000..9d85292dd --- /dev/null +++ b/thirdparty/eigen/doc/examples/TemplateKeyword_flexible.cpp @@ -0,0 +1,22 @@ +#include +#include + +using namespace Eigen; + +template +void copyUpperTriangularPart(MatrixBase& dst, const MatrixBase& src) +{ + /* Note the 'template' keywords in the following line! */ + dst.template triangularView() = src.template triangularView(); +} + +int main() +{ + MatrixXi m1 = MatrixXi::Ones(5,5); + MatrixXi m2 = MatrixXi::Random(4,4); + std::cout << "m2 before copy:" << std::endl; + std::cout << m2 << std::endl << std::endl; + copyUpperTriangularPart(m2, m1.topLeftCorner(4,4)); + std::cout << "m2 after copy:" << std::endl; + std::cout << m2 << std::endl << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/TemplateKeyword_simple.cpp b/thirdparty/eigen/doc/examples/TemplateKeyword_simple.cpp new file mode 100644 index 000000000..6998c1769 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TemplateKeyword_simple.cpp @@ -0,0 +1,20 @@ +#include +#include + +using namespace Eigen; + +void copyUpperTriangularPart(MatrixXf& dst, const MatrixXf& src) +{ + dst.triangularView() = src.triangularView(); +} + +int main() +{ + MatrixXf m1 = MatrixXf::Ones(4,4); + MatrixXf m2 = MatrixXf::Random(4,4); + std::cout << "m2 before copy:" << std::endl; + std::cout << m2 << std::endl << std::endl; + copyUpperTriangularPart(m2, m1); + std::cout << "m2 after copy:" << std::endl; + std::cout << m2 << std::endl << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialInplaceLU.cpp b/thirdparty/eigen/doc/examples/TutorialInplaceLU.cpp new file mode 100644 index 000000000..cb9c59b60 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialInplaceLU.cpp @@ -0,0 +1,61 @@ +#include +struct init { + init() { std::cout << "[" << "init" << "]" << std::endl; } +}; +init init_obj; +// [init] +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + MatrixXd A(2,2); + A << 2, -1, 1, 3; + cout << "Here is the input matrix A before decomposition:\n" << A << endl; +cout << "[init]" << endl; + +cout << "[declaration]" << endl; + PartialPivLU > lu(A); + cout << "Here is the input matrix A after decomposition:\n" << A << endl; +cout << "[declaration]" << endl; + +cout << "[matrixLU]" << endl; + cout << "Here is the matrix storing the L and U factors:\n" << lu.matrixLU() << endl; +cout << "[matrixLU]" << endl; + +cout << "[solve]" << endl; + MatrixXd A0(2,2); A0 << 2, -1, 1, 3; + VectorXd b(2); b << 1, 2; + VectorXd x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[solve]" << endl; + +cout << "[modifyA]" << endl; + A << 3, 4, -2, 1; + x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[modifyA]" << endl; + +cout << "[recompute]" << endl; + A0 = A; // save A + lu.compute(A); + x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[recompute]" << endl; + +cout << "[recompute_bis0]" << endl; + MatrixXd A1(2,2); + A1 << 5,-2,3,4; + lu.compute(A1); + cout << "Here is the input matrix A1 after decomposition:\n" << A1 << endl; +cout << "[recompute_bis0]" << endl; + +cout << "[recompute_bis1]" << endl; + x = lu.solve(b); + cout << "Residual: " << (A1 * x - b).norm() << endl; +cout << "[recompute_bis1]" << endl; + +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp new file mode 100644 index 000000000..06ba6461a --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgComputeTwice.cpp @@ -0,0 +1,23 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix2f A, b; + LLT llt; + A << 2, -1, -1, 3; + b << 1, 2, 3, 1; + cout << "Here is the matrix A:\n" << A << endl; + cout << "Here is the right hand side b:\n" << b << endl; + cout << "Computing LLT decomposition..." << endl; + llt.compute(A); + cout << "The solution is:\n" << llt.solve(b) << endl; + A(1,1)++; + cout << "The matrix A is now:\n" << A << endl; + cout << "Computing LLT decomposition..." << endl; + llt.compute(A); + cout << "The solution is now:\n" << llt.solve(b) << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp new file mode 100644 index 000000000..f362fb71a --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgExComputeSolveError.cpp @@ -0,0 +1,14 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + MatrixXd A = MatrixXd::Random(100,100); + MatrixXd b = MatrixXd::Random(100,50); + MatrixXd x = A.fullPivLu().solve(b); + double relative_error = (A*x - b).norm() / b.norm(); // norm() is L2 norm + cout << "The relative error is:\n" << relative_error << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp new file mode 100644 index 000000000..3a99a94d7 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgExSolveColPivHouseholderQR.cpp @@ -0,0 +1,17 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix3f A; + Vector3f b; + A << 1,2,3, 4,5,6, 7,8,10; + b << 3, 3, 4; + cout << "Here is the matrix A:\n" << A << endl; + cout << "Here is the vector b:\n" << b << endl; + Vector3f x = A.colPivHouseholderQr().solve(b); + cout << "The solution is:\n" << x << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp new file mode 100644 index 000000000..f8beacd27 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgExSolveLDLT.cpp @@ -0,0 +1,16 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix2f A, b; + A << 2, -1, -1, 3; + b << 1, 2, 3, 1; + cout << "Here is the matrix A:\n" << A << endl; + cout << "Here is the right hand side b:\n" << b << endl; + Matrix2f x = A.ldlt().solve(b); + cout << "The solution is:\n" << x << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp new file mode 100644 index 000000000..14dde5b35 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgInverseDeterminant.cpp @@ -0,0 +1,16 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix3f A; + A << 1, 2, 1, + 2, 1, 0, + -1, 1, 2; + cout << "Here is the matrix A:\n" << A << endl; + cout << "The determinant of A is " << A.determinant() << endl; + cout << "The inverse of A is:\n" << A.inverse() << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp new file mode 100644 index 000000000..c5165077f --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgRankRevealing.cpp @@ -0,0 +1,20 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix3f A; + A << 1, 2, 5, + 2, 1, 4, + 3, 0, 3; + cout << "Here is the matrix A:\n" << A << endl; + FullPivLU lu_decomp(A); + cout << "The rank of A is " << lu_decomp.rank() << endl; + cout << "Here is a matrix whose columns form a basis of the null-space of A:\n" + << lu_decomp.kernel() << endl; + cout << "Here is a matrix whose columns form a basis of the column-space of A:\n" + << lu_decomp.image(A) << endl; // yes, have to pass the original A +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp new file mode 100644 index 000000000..9fbc031de --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + MatrixXf A = MatrixXf::Random(3, 2); + cout << "Here is the matrix A:\n" << A << endl; + VectorXf b = VectorXf::Random(3); + cout << "Here is the right hand side b:\n" << b << endl; + cout << "The least-squares solution is:\n" + << A.jacobiSvd(ComputeThinU | ComputeThinV).solve(b) << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp new file mode 100644 index 000000000..8d1d1ed65 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgSelfAdjointEigenSolver.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix2f A; + A << 1, 2, 2, 3; + cout << "Here is the matrix A:\n" << A << endl; + SelfAdjointEigenSolver eigensolver(A); + if (eigensolver.info() != Success) abort(); + cout << "The eigenvalues of A are:\n" << eigensolver.eigenvalues() << endl; + cout << "Here's a matrix whose columns are eigenvectors of A \n" + << "corresponding to these eigenvalues:\n" + << eigensolver.eigenvectors() << endl; +} diff --git a/thirdparty/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp b/thirdparty/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp new file mode 100644 index 000000000..3956b13a3 --- /dev/null +++ b/thirdparty/eigen/doc/examples/TutorialLinAlgSetThreshold.cpp @@ -0,0 +1,16 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix2d A; + A << 2, 1, + 2, 0.9999999999; + FullPivLU lu(A); + cout << "By default, the rank of A is found to be " << lu.rank() << endl; + lu.setThreshold(1e-5); + cout << "With threshold 1e-5, the rank of A is found to be " << lu.rank() << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp new file mode 100644 index 000000000..dc720ff58 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_accessors.cpp @@ -0,0 +1,24 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + ArrayXXf m(2,2); + + // assign some values coefficient by coefficient + m(0,0) = 1.0; m(0,1) = 2.0; + m(1,0) = 3.0; m(1,1) = m(0,1) + m(1,0); + + // print values to standard output + cout << m << endl << endl; + + // using the comma-initializer is also allowed + m << 1.0,2.0, + 3.0,4.0; + + // print values to standard output + cout << m << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp new file mode 100644 index 000000000..480ffb00f --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_addition.cpp @@ -0,0 +1,23 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + ArrayXXf a(3,3); + ArrayXXf b(3,3); + a << 1,2,3, + 4,5,6, + 7,8,9; + b << 1,2,3, + 1,2,3, + 1,2,3; + + // Adding two arrays + cout << "a + b = " << endl << a + b << endl << endl; + + // Subtracting a scalar from an array + cout << "a - 2 = " << endl << a - 2 << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp new file mode 100644 index 000000000..d9046c63d --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_cwise_other.cpp @@ -0,0 +1,19 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + ArrayXf a = ArrayXf::Random(5); + a *= 2; + cout << "a =" << endl + << a << endl; + cout << "a.abs() =" << endl + << a.abs() << endl; + cout << "a.abs().sqrt() =" << endl + << a.abs().sqrt() << endl; + cout << "a.min(a.abs().sqrt()) =" << endl + << a.min(a.abs().sqrt()) << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp new file mode 100644 index 000000000..371f07068 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop.cpp @@ -0,0 +1,22 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXf m(2,2); + MatrixXf n(2,2); + MatrixXf result(2,2); + + m << 1,2, + 3,4; + n << 5,6, + 7,8; + + result = (m.array() + 4).matrix() * m; + cout << "-- Combination 1: --" << endl << result << endl << endl; + result = (m.array() * n.array()).matrix() * m; + cout << "-- Combination 2: --" << endl << result << endl << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp new file mode 100644 index 000000000..101427511 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_interop_matrix.cpp @@ -0,0 +1,26 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXf m(2,2); + MatrixXf n(2,2); + MatrixXf result(2,2); + + m << 1,2, + 3,4; + n << 5,6, + 7,8; + + result = m * n; + cout << "-- Matrix m*n: --" << endl << result << endl << endl; + result = m.array() * n.array(); + cout << "-- Array m*n: --" << endl << result << endl << endl; + result = m.cwiseProduct(n); + cout << "-- With cwiseProduct: --" << endl << result << endl << endl; + result = m.array() + 4; + cout << "-- Array m + 4: --" << endl << result << endl << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp new file mode 100644 index 000000000..6cb439ff7 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ArrayClass_mult.cpp @@ -0,0 +1,16 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + ArrayXXf a(2,2); + ArrayXXf b(2,2); + a << 1,2, + 3,4; + b << 5,6, + 7,8; + cout << "a * b = " << endl << a * b << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp new file mode 100644 index 000000000..76f49f2fb --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_block_assignment.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Array22f m; + m << 1,2, + 3,4; + Array44f a = Array44f::Constant(0.6); + cout << "Here is the array a:" << endl << a << endl << endl; + a.block<2,2>(1,1) = m; + cout << "Here is now a with m copied into its central 2x2 block:" << endl << a << endl << endl; + a.block(0,0,2,3) = a.block(2,1,2,3); + cout << "Here is now a with bottom-right 2x3 block copied into top-left 2x2 block:" << endl << a << endl << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_colrow.cpp b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_colrow.cpp new file mode 100644 index 000000000..2e7eb009b --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_colrow.cpp @@ -0,0 +1,17 @@ +#include +#include + +using namespace std; + +int main() +{ + Eigen::MatrixXf m(3,3); + m << 1,2,3, + 4,5,6, + 7,8,9; + cout << "Here is the matrix m:" << endl << m << endl; + cout << "2nd Row: " << m.row(1) << endl; + m.col(2) += 3 * m.col(0); + cout << "After adding 3 times the first column into the third column, the matrix m is:\n"; + cout << m << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_corner.cpp b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_corner.cpp new file mode 100644 index 000000000..3a31507aa --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_corner.cpp @@ -0,0 +1,17 @@ +#include +#include + +using namespace std; + +int main() +{ + Eigen::Matrix4f m; + m << 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10,11,12, + 13,14,15,16; + cout << "m.leftCols(2) =" << endl << m.leftCols(2) << endl << endl; + cout << "m.bottomRows<2>() =" << endl << m.bottomRows<2>() << endl << endl; + m.topLeftCorner(1,3) = m.bottomRightCorner(3,1).transpose(); + cout << "After assignment, m = " << endl << m << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_print_block.cpp b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_print_block.cpp new file mode 100644 index 000000000..edea4aefe --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_print_block.cpp @@ -0,0 +1,20 @@ +#include +#include + +using namespace std; + +int main() +{ + Eigen::MatrixXf m(4,4); + m << 1, 2, 3, 4, + 5, 6, 7, 8, + 9,10,11,12, + 13,14,15,16; + cout << "Block in the middle" << endl; + cout << m.block<2,2>(1,1) << endl << endl; + for (int i = 1; i <= 3; ++i) + { + cout << "Block of size " << i << "x" << i << endl; + cout << m.block(0,0,i,i) << endl << endl; + } +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_vector.cpp b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_vector.cpp new file mode 100644 index 000000000..4a0b02342 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_BlockOperations_vector.cpp @@ -0,0 +1,14 @@ +#include +#include + +using namespace std; + +int main() +{ + Eigen::ArrayXf v(6); + v << 1, 2, 3, 4, 5, 6; + cout << "v.head(3) =" << endl << v.head(3) << endl << endl; + cout << "v.tail<3>() = " << endl << v.tail<3>() << endl << endl; + v.segment(1,4) *= 2; + cout << "after 'v.segment(1,4) *= 2', v =" << endl << v << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_PartialLU_solve.cpp b/thirdparty/eigen/doc/examples/Tutorial_PartialLU_solve.cpp new file mode 100644 index 000000000..a5608792f --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_PartialLU_solve.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Matrix3f A; + Vector3f b; + A << 1,2,3, 4,5,6, 7,8,10; + b << 3, 3, 4; + cout << "Here is the matrix A:" << endl << A << endl; + cout << "Here is the vector b:" << endl << b << endl; + Vector3f x = A.lu().solve(b); + cout << "The solution is:" << endl << x << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp new file mode 100644 index 000000000..334b4d852 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_1nn.cpp @@ -0,0 +1,24 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Eigen::MatrixXf m(2,4); + Eigen::VectorXf v(2); + + m << 1, 23, 6, 9, + 3, 11, 7, 2; + + v << 2, + 3; + + MatrixXf::Index index; + // find nearest neighbour + (m.colwise() - v).colwise().squaredNorm().minCoeff(&index); + + cout << "Nearest neighbour is column " << index << ":" << endl; + cout << m.col(index) << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple.cpp new file mode 100644 index 000000000..e6c87c6a4 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple.cpp @@ -0,0 +1,21 @@ +#include +#include + +using namespace std; +int main() +{ + Eigen::MatrixXf mat(2,4); + Eigen::VectorXf v(2); + + mat << 1, 2, 6, 9, + 3, 1, 7, 2; + + v << 0, + 1; + + //add v to each column of m + mat.colwise() += v; + + std::cout << "Broadcasting result: " << std::endl; + std::cout << mat << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple_rowwise.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple_rowwise.cpp new file mode 100644 index 000000000..d87c96ab1 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_broadcast_simple_rowwise.cpp @@ -0,0 +1,20 @@ +#include +#include + +using namespace std; +int main() +{ + Eigen::MatrixXf mat(2,4); + Eigen::VectorXf v(4); + + mat << 1, 2, 6, 9, + 3, 1, 7, 2; + + v << 0,1,2,3; + + //add v to each row of m + mat.rowwise() += v.transpose(); + + std::cout << "Broadcasting result: " << std::endl; + std::cout << mat << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_colwise.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_colwise.cpp new file mode 100644 index 000000000..df6825663 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_colwise.cpp @@ -0,0 +1,13 @@ +#include +#include + +using namespace std; +int main() +{ + Eigen::MatrixXf mat(2,4); + mat << 1, 2, 6, 9, + 3, 1, 7, 2; + + std::cout << "Column's maximum: " << std::endl + << mat.colwise().maxCoeff() << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp new file mode 100644 index 000000000..049c747b0 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_maxnorm.cpp @@ -0,0 +1,20 @@ +#include +#include + +using namespace std; +using namespace Eigen; +int main() +{ + MatrixXf mat(2,4); + mat << 1, 2, 6, 9, + 3, 1, 7, 2; + + MatrixXf::Index maxIndex; + float maxNorm = mat.colwise().sum().maxCoeff(&maxIndex); + + std::cout << "Maximum sum at position " << maxIndex << std::endl; + + std::cout << "The corresponding vector is: " << std::endl; + std::cout << mat.col( maxIndex ) << std::endl; + std::cout << "And its sum is is: " << maxNorm << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp new file mode 100644 index 000000000..0cca37f36 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_bool.cpp @@ -0,0 +1,21 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + ArrayXXf a(2,2); + + a << 1,2, + 3,4; + + cout << "(a > 0).all() = " << (a > 0).all() << endl; + cout << "(a > 0).any() = " << (a > 0).any() << endl; + cout << "(a > 0).count() = " << (a > 0).count() << endl; + cout << endl; + cout << "(a > 2).all() = " << (a > 2).all() << endl; + cout << "(a > 2).any() = " << (a > 2).any() << endl; + cout << "(a > 2).count() = " << (a > 2).count() << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp new file mode 100644 index 000000000..740439fb3 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.cpp @@ -0,0 +1,28 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + VectorXf v(2); + MatrixXf m(2,2), n(2,2); + + v << -1, + 2; + + m << 1,-2, + -3,4; + + cout << "v.squaredNorm() = " << v.squaredNorm() << endl; + cout << "v.norm() = " << v.norm() << endl; + cout << "v.lpNorm<1>() = " << v.lpNorm<1>() << endl; + cout << "v.lpNorm() = " << v.lpNorm() << endl; + + cout << endl; + cout << "m.squaredNorm() = " << m.squaredNorm() << endl; + cout << "m.norm() = " << m.norm() << endl; + cout << "m.lpNorm<1>() = " << m.lpNorm<1>() << endl; + cout << "m.lpNorm() = " << m.lpNorm() << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp new file mode 100644 index 000000000..62e28fc31 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXf m(2,2); + m << 1,-2, + -3,4; + + cout << "1-norm(m) = " << m.cwiseAbs().colwise().sum().maxCoeff() + << " == " << m.colwise().lpNorm<1>().maxCoeff() << endl; + + cout << "infty-norm(m) = " << m.cwiseAbs().rowwise().sum().maxCoeff() + << " == " << m.rowwise().lpNorm<1>().maxCoeff() << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_rowwise.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_rowwise.cpp new file mode 100644 index 000000000..80427c9f7 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_rowwise.cpp @@ -0,0 +1,13 @@ +#include +#include + +using namespace std; +int main() +{ + Eigen::MatrixXf mat(2,4); + mat << 1, 2, 6, 9, + 3, 1, 7, 2; + + std::cout << "Row's maximum: " << std::endl + << mat.rowwise().maxCoeff() << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp new file mode 100644 index 000000000..b54e9aa31 --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_visitors.cpp @@ -0,0 +1,26 @@ +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + Eigen::MatrixXf m(2,2); + + m << 1, 2, + 3, 4; + + //get location of maximum + MatrixXf::Index maxRow, maxCol; + float max = m.maxCoeff(&maxRow, &maxCol); + + //get location of minimum + MatrixXf::Index minRow, minCol; + float min = m.minCoeff(&minRow, &minCol); + + cout << "Max: " << max << ", at: " << + maxRow << "," << maxCol << endl; + cout << "Min: " << min << ", at: " << + minRow << "," << minCol << endl; +} diff --git a/thirdparty/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp b/thirdparty/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp new file mode 100644 index 000000000..0f0280e0e --- /dev/null +++ b/thirdparty/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp @@ -0,0 +1,22 @@ +#include +#include + +using namespace Eigen; + +int main() +{ + for (int size=1; size<=4; ++size) + { + MatrixXi m(size,size+1); // a (size)x(size+1)-matrix of int's + for (int j=0; j +#include + +using namespace Eigen; + +int main() +{ + Matrix3f m3; + m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9; + Matrix4f m4 = Matrix4f::Identity(); + Vector4i v4(1, 2, 3, 4); + + std::cout << "m3\n" << m3 << "\nm4:\n" + << m4 << "\nv4:\n" << v4 << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/class_Block.cpp b/thirdparty/eigen/doc/examples/class_Block.cpp new file mode 100644 index 000000000..ace719afc --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_Block.cpp @@ -0,0 +1,27 @@ +#include +#include +using namespace Eigen; +using namespace std; + +template +Eigen::Block +topLeftCorner(MatrixBase& m, int rows, int cols) +{ + return Eigen::Block(m.derived(), 0, 0, rows, cols); +} + +template +const Eigen::Block +topLeftCorner(const MatrixBase& m, int rows, int cols) +{ + return Eigen::Block(m.derived(), 0, 0, rows, cols); +} + +int main(int, char**) +{ + Matrix4d m = Matrix4d::Identity(); + cout << topLeftCorner(4*m, 2, 3) << endl; // calls the const version + topLeftCorner(m, 2, 3) *= 5; // calls the non-const version + cout << "Now the matrix m is:" << endl << m << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/class_CwiseBinaryOp.cpp b/thirdparty/eigen/doc/examples/class_CwiseBinaryOp.cpp new file mode 100644 index 000000000..682af46de --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_CwiseBinaryOp.cpp @@ -0,0 +1,18 @@ +#include +#include +using namespace Eigen; +using namespace std; + +// define a custom template binary functor +template struct MakeComplexOp { + EIGEN_EMPTY_STRUCT_CTOR(MakeComplexOp) + typedef complex result_type; + complex operator()(const Scalar& a, const Scalar& b) const { return complex(a,b); } +}; + +int main(int, char**) +{ + Matrix4d m1 = Matrix4d::Random(), m2 = Matrix4d::Random(); + cout << m1.binaryExpr(m2, MakeComplexOp()) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/class_CwiseUnaryOp.cpp b/thirdparty/eigen/doc/examples/class_CwiseUnaryOp.cpp new file mode 100644 index 000000000..a5fcc153d --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_CwiseUnaryOp.cpp @@ -0,0 +1,19 @@ +#include +#include +using namespace Eigen; +using namespace std; + +// define a custom template unary functor +template +struct CwiseClampOp { + CwiseClampOp(const Scalar& inf, const Scalar& sup) : m_inf(inf), m_sup(sup) {} + const Scalar operator()(const Scalar& x) const { return xm_sup ? m_sup : x); } + Scalar m_inf, m_sup; +}; + +int main(int, char**) +{ + Matrix4d m1 = Matrix4d::Random(); + cout << m1 << endl << "becomes: " << endl << m1.unaryExpr(CwiseClampOp(-0.5,0.5)) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp b/thirdparty/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp new file mode 100644 index 000000000..36706d8ed --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_CwiseUnaryOp_ptrfun.cpp @@ -0,0 +1,20 @@ +#include +#include +using namespace Eigen; +using namespace std; + +// define function to be applied coefficient-wise +double ramp(double x) +{ + if (x > 0) + return x; + else + return 0; +} + +int main(int, char**) +{ + Matrix4d m1 = Matrix4d::Random(); + cout << m1 << endl << "becomes: " << endl << m1.unaryExpr(ptr_fun(ramp)) << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/class_FixedBlock.cpp b/thirdparty/eigen/doc/examples/class_FixedBlock.cpp new file mode 100644 index 000000000..9978b32e8 --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_FixedBlock.cpp @@ -0,0 +1,27 @@ +#include +#include +using namespace Eigen; +using namespace std; + +template +Eigen::Block +topLeft2x2Corner(MatrixBase& m) +{ + return Eigen::Block(m.derived(), 0, 0); +} + +template +const Eigen::Block +topLeft2x2Corner(const MatrixBase& m) +{ + return Eigen::Block(m.derived(), 0, 0); +} + +int main(int, char**) +{ + Matrix3d m = Matrix3d::Identity(); + cout << topLeft2x2Corner(4*m) << endl; // calls the const version + topLeft2x2Corner(m) *= 2; // calls the non-const version + cout << "Now the matrix m is:" << endl << m << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/class_FixedVectorBlock.cpp b/thirdparty/eigen/doc/examples/class_FixedVectorBlock.cpp new file mode 100644 index 000000000..c88c9fbf1 --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_FixedVectorBlock.cpp @@ -0,0 +1,27 @@ +#include +#include +using namespace Eigen; +using namespace std; + +template +Eigen::VectorBlock +firstTwo(MatrixBase& v) +{ + return Eigen::VectorBlock(v.derived(), 0); +} + +template +const Eigen::VectorBlock +firstTwo(const MatrixBase& v) +{ + return Eigen::VectorBlock(v.derived(), 0); +} + +int main(int, char**) +{ + Matrix v; v << 1,2,3,4,5,6; + cout << firstTwo(4*v) << endl; // calls the const version + firstTwo(v) *= 2; // calls the non-const version + cout << "Now the vector v is:" << endl << v << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/class_VectorBlock.cpp b/thirdparty/eigen/doc/examples/class_VectorBlock.cpp new file mode 100644 index 000000000..dc213df20 --- /dev/null +++ b/thirdparty/eigen/doc/examples/class_VectorBlock.cpp @@ -0,0 +1,27 @@ +#include +#include +using namespace Eigen; +using namespace std; + +template +Eigen::VectorBlock +segmentFromRange(MatrixBase& v, int start, int end) +{ + return Eigen::VectorBlock(v.derived(), start, end-start); +} + +template +const Eigen::VectorBlock +segmentFromRange(const MatrixBase& v, int start, int end) +{ + return Eigen::VectorBlock(v.derived(), start, end-start); +} + +int main(int, char**) +{ + Matrix v; v << 1,2,3,4,5,6; + cout << segmentFromRange(2*v, 2, 4) << endl; // calls the const version + segmentFromRange(v, 1, 3) *= 5; // calls the non-const version + cout << "Now the vector v is:" << endl << v << endl; + return 0; +} diff --git a/thirdparty/eigen/doc/examples/function_taking_eigenbase.cpp b/thirdparty/eigen/doc/examples/function_taking_eigenbase.cpp new file mode 100644 index 000000000..49d94b3d6 --- /dev/null +++ b/thirdparty/eigen/doc/examples/function_taking_eigenbase.cpp @@ -0,0 +1,18 @@ +#include +#include +using namespace Eigen; + +template +void print_size(const EigenBase& b) +{ + std::cout << "size (rows, cols): " << b.size() << " (" << b.rows() + << ", " << b.cols() << ")" << std::endl; +} + +int main() +{ + Vector3f v; + print_size(v); + // v.asDiagonal() returns a 3x3 diagonal matrix pseudo-expression + print_size(v.asDiagonal()); +} diff --git a/thirdparty/eigen/doc/examples/function_taking_ref.cpp b/thirdparty/eigen/doc/examples/function_taking_ref.cpp new file mode 100644 index 000000000..162a202e4 --- /dev/null +++ b/thirdparty/eigen/doc/examples/function_taking_ref.cpp @@ -0,0 +1,19 @@ +#include +#include +using namespace Eigen; +using namespace std; + +float inv_cond(const Ref& a) +{ + const VectorXf sing_vals = a.jacobiSvd().singularValues(); + return sing_vals(sing_vals.size()-1) / sing_vals(0); +} + +int main() +{ + Matrix4f m = Matrix4f::Random(); + cout << "matrix m:" << endl << m << endl << endl; + cout << "inv_cond(m): " << inv_cond(m) << endl; + cout << "inv_cond(m(1:3,1:3)): " << inv_cond(m.topLeftCorner(3,3)) << endl; + cout << "inv_cond(m+I): " << inv_cond(m+Matrix4f::Identity()) << endl; +} diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp b/thirdparty/eigen/doc/examples/make_circulant.cpp new file mode 100644 index 000000000..92e6aaa2b --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp @@ -0,0 +1,11 @@ +/* +This program is presented in several fragments in the doc page. +Every fragment is in its own file; this file simply combines them. +*/ + +#include "make_circulant.cpp.preamble" +#include "make_circulant.cpp.traits" +#include "make_circulant.cpp.expression" +#include "make_circulant.cpp.evaluator" +#include "make_circulant.cpp.entry" +#include "make_circulant.cpp.main" diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp.entry b/thirdparty/eigen/doc/examples/make_circulant.cpp.entry new file mode 100644 index 000000000..f9d2eb8a9 --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp.entry @@ -0,0 +1,5 @@ +template +Circulant makeCirculant(const Eigen::MatrixBase& arg) +{ + return Circulant(arg.derived()); +} diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp.evaluator b/thirdparty/eigen/doc/examples/make_circulant.cpp.evaluator new file mode 100644 index 000000000..2ba79e783 --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp.evaluator @@ -0,0 +1,32 @@ +namespace Eigen { + namespace internal { + template + struct evaluator > + : evaluator_base > + { + typedef Circulant XprType; + typedef typename nested_eval::type ArgTypeNested; + typedef typename remove_all::type ArgTypeNestedCleaned; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = Eigen::ColMajor + }; + + evaluator(const XprType& xpr) + : m_argImpl(xpr.m_arg), m_rows(xpr.rows()) + { } + + CoeffReturnType coeff(Index row, Index col) const + { + Index index = row - col; + if (index < 0) index += m_rows; + return m_argImpl.coeff(index); + } + + evaluator m_argImpl; + const Index m_rows; + }; + } +} diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp.expression b/thirdparty/eigen/doc/examples/make_circulant.cpp.expression new file mode 100644 index 000000000..380cd4450 --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp.expression @@ -0,0 +1,20 @@ +template +class Circulant : public Eigen::MatrixBase > +{ +public: + Circulant(const ArgType& arg) + : m_arg(arg) + { + EIGEN_STATIC_ASSERT(ArgType::ColsAtCompileTime == 1, + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); + } + + typedef typename Eigen::internal::ref_selector::type Nested; + + typedef Eigen::Index Index; + Index rows() const { return m_arg.rows(); } + Index cols() const { return m_arg.rows(); } + + typedef typename Eigen::internal::ref_selector::type ArgTypeNested; + ArgTypeNested m_arg; +}; diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp.main b/thirdparty/eigen/doc/examples/make_circulant.cpp.main new file mode 100644 index 000000000..877f97f62 --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp.main @@ -0,0 +1,8 @@ +int main() +{ + Eigen::VectorXd vec(4); + vec << 1, 2, 4, 8; + Eigen::MatrixXd mat; + mat = makeCirculant(vec); + std::cout << mat << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp.preamble b/thirdparty/eigen/doc/examples/make_circulant.cpp.preamble new file mode 100644 index 000000000..e575cce14 --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp.preamble @@ -0,0 +1,4 @@ +#include +#include + +template class Circulant; diff --git a/thirdparty/eigen/doc/examples/make_circulant.cpp.traits b/thirdparty/eigen/doc/examples/make_circulant.cpp.traits new file mode 100644 index 000000000..4e04535d3 --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant.cpp.traits @@ -0,0 +1,19 @@ +namespace Eigen { + namespace internal { + template + struct traits > + { + typedef Eigen::Dense StorageKind; + typedef Eigen::MatrixXpr XprKind; + typedef typename ArgType::StorageIndex StorageIndex; + typedef typename ArgType::Scalar Scalar; + enum { + Flags = Eigen::ColMajor, + RowsAtCompileTime = ArgType::RowsAtCompileTime, + ColsAtCompileTime = ArgType::RowsAtCompileTime, + MaxRowsAtCompileTime = ArgType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = ArgType::MaxRowsAtCompileTime + }; + }; + } +} diff --git a/thirdparty/eigen/doc/examples/make_circulant2.cpp b/thirdparty/eigen/doc/examples/make_circulant2.cpp new file mode 100644 index 000000000..95d3dd31a --- /dev/null +++ b/thirdparty/eigen/doc/examples/make_circulant2.cpp @@ -0,0 +1,52 @@ +#include +#include + +using namespace Eigen; + +// [circulant_func] +template +class circulant_functor { + const ArgType &m_vec; +public: + circulant_functor(const ArgType& arg) : m_vec(arg) {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + Index index = row - col; + if (index < 0) index += m_vec.size(); + return m_vec(index); + } +}; +// [circulant_func] + +// [square] +template +struct circulant_helper { + typedef Matrix MatrixType; +}; +// [square] + +// [makeCirculant] +template +CwiseNullaryOp, typename circulant_helper::MatrixType> +makeCirculant(const Eigen::MatrixBase& arg) +{ + typedef typename circulant_helper::MatrixType MatrixType; + return MatrixType::NullaryExpr(arg.size(), arg.size(), circulant_functor(arg.derived())); +} +// [makeCirculant] + +// [main] +int main() +{ + Eigen::VectorXd vec(4); + vec << 1, 2, 4, 8; + Eigen::MatrixXd mat; + mat = makeCirculant(vec); + std::cout << mat << std::endl; +} +// [main] diff --git a/thirdparty/eigen/doc/examples/matrixfree_cg.cpp b/thirdparty/eigen/doc/examples/matrixfree_cg.cpp new file mode 100644 index 000000000..6a205aea3 --- /dev/null +++ b/thirdparty/eigen/doc/examples/matrixfree_cg.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include + +class MatrixReplacement; +using Eigen::SparseMatrix; + +namespace Eigen { +namespace internal { + // MatrixReplacement looks-like a SparseMatrix, so let's inherits its traits: + template<> + struct traits : public Eigen::internal::traits > + {}; +} +} + +// Example of a matrix-free wrapper from a user type to Eigen's compatible type +// For the sake of simplicity, this example simply wrap a Eigen::SparseMatrix. +class MatrixReplacement : public Eigen::EigenBase { +public: + // Required typedefs, constants, and method: + typedef double Scalar; + typedef double RealScalar; + typedef int StorageIndex; + enum { + ColsAtCompileTime = Eigen::Dynamic, + MaxColsAtCompileTime = Eigen::Dynamic, + IsRowMajor = false + }; + + Index rows() const { return mp_mat->rows(); } + Index cols() const { return mp_mat->cols(); } + + template + Eigen::Product operator*(const Eigen::MatrixBase& x) const { + return Eigen::Product(*this, x.derived()); + } + + // Custom API: + MatrixReplacement() : mp_mat(0) {} + + void attachMyMatrix(const SparseMatrix &mat) { + mp_mat = &mat; + } + const SparseMatrix my_matrix() const { return *mp_mat; } + +private: + const SparseMatrix *mp_mat; +}; + + +// Implementation of MatrixReplacement * Eigen::DenseVector though a specialization of internal::generic_product_impl: +namespace Eigen { +namespace internal { + + template + struct generic_product_impl // GEMV stands for matrix-vector + : generic_product_impl_base > + { + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const MatrixReplacement& lhs, const Rhs& rhs, const Scalar& alpha) + { + // This method should implement "dst += alpha * lhs * rhs" inplace, + // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it. + assert(alpha==Scalar(1) && "scaling is not implemented"); + + // Here we could simply call dst.noalias() += lhs.my_matrix() * rhs, + // but let's do something fancier (and less efficient): + for(Index i=0; i S = Eigen::MatrixXd::Random(n,n).sparseView(0.5,1); + S = S.transpose()*S; + + MatrixReplacement A; + A.attachMyMatrix(S); + + Eigen::VectorXd b(n), x; + b.setRandom(); + + // Solve Ax = b using various iterative solver with matrix-free version: + { + Eigen::ConjugateGradient cg; + cg.compute(A); + x = cg.solve(b); + std::cout << "CG: #iterations: " << cg.iterations() << ", estimated error: " << cg.error() << std::endl; + } + + { + Eigen::BiCGSTAB bicg; + bicg.compute(A); + x = bicg.solve(b); + std::cout << "BiCGSTAB: #iterations: " << bicg.iterations() << ", estimated error: " << bicg.error() << std::endl; + } + + { + Eigen::GMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "GMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } + + { + Eigen::DGMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "DGMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } + + { + Eigen::MINRES minres; + minres.compute(A); + x = minres.solve(b); + std::cout << "MINRES: #iterations: " << minres.iterations() << ", estimated error: " << minres.error() << std::endl; + } +} diff --git a/thirdparty/eigen/doc/examples/nullary_indexing.cpp b/thirdparty/eigen/doc/examples/nullary_indexing.cpp new file mode 100644 index 000000000..e27c3585a --- /dev/null +++ b/thirdparty/eigen/doc/examples/nullary_indexing.cpp @@ -0,0 +1,66 @@ +#include +#include + +using namespace Eigen; + +// [functor] +template +class indexing_functor { + const ArgType &m_arg; + const RowIndexType &m_rowIndices; + const ColIndexType &m_colIndices; +public: + typedef Matrix MatrixType; + + indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) + : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices) + {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + return m_arg(m_rowIndices[row], m_colIndices[col]); + } +}; +// [functor] + +// [function] +template +CwiseNullaryOp, typename indexing_functor::MatrixType> +indexing(const Eigen::MatrixBase& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) +{ + typedef indexing_functor Func; + typedef typename Func::MatrixType MatrixType; + return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices)); +} +// [function] + + +int main() +{ + std::cout << "[main1]\n"; + Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4); + Array3i ri(1,2,1); + ArrayXi ci(6); ci << 3,2,1,0,0,2; + Eigen::MatrixXi B = indexing(A, ri, ci); + std::cout << "A =" << std::endl; + std::cout << A << std::endl << std::endl; + std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl; + std::cout << B << std::endl; + std::cout << "[main1]\n"; + + std::cout << "[main2]\n"; + B = indexing(A, ri+1, ci); + std::cout << "A(ri+1,ci) =" << std::endl; + std::cout << B << std::endl << std::endl; +#if __cplusplus >= 201103L + B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)); + std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl; + std::cout << B << std::endl << std::endl; +#endif + std::cout << "[main2]\n"; +} + diff --git a/thirdparty/eigen/doc/examples/tut_arithmetic_add_sub.cpp b/thirdparty/eigen/doc/examples/tut_arithmetic_add_sub.cpp new file mode 100644 index 000000000..e97477b6e --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_arithmetic_add_sub.cpp @@ -0,0 +1,22 @@ +#include +#include + +using namespace Eigen; + +int main() +{ + Matrix2d a; + a << 1, 2, + 3, 4; + MatrixXd b(2,2); + b << 2, 3, + 1, 4; + std::cout << "a + b =\n" << a + b << std::endl; + std::cout << "a - b =\n" << a - b << std::endl; + std::cout << "Doing a += b;" << std::endl; + a += b; + std::cout << "Now a =\n" << a << std::endl; + Vector3d v(1,2,3); + Vector3d w(1,0,0); + std::cout << "-v + w - v =\n" << -v + w - v << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_arithmetic_dot_cross.cpp b/thirdparty/eigen/doc/examples/tut_arithmetic_dot_cross.cpp new file mode 100644 index 000000000..631c9a5e0 --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_arithmetic_dot_cross.cpp @@ -0,0 +1,15 @@ +#include +#include + +using namespace Eigen; +using namespace std; +int main() +{ + Vector3d v(1,2,3); + Vector3d w(0,1,2); + + cout << "Dot product: " << v.dot(w) << endl; + double dp = v.adjoint()*w; // automatic conversion of the inner product to a scalar + cout << "Dot product via a matrix product: " << dp << endl; + cout << "Cross product:\n" << v.cross(w) << endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp b/thirdparty/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp new file mode 100644 index 000000000..f21390241 --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_arithmetic_matrix_mul.cpp @@ -0,0 +1,19 @@ +#include +#include + +using namespace Eigen; +int main() +{ + Matrix2d mat; + mat << 1, 2, + 3, 4; + Vector2d u(-1,1), v(2,0); + std::cout << "Here is mat*mat:\n" << mat*mat << std::endl; + std::cout << "Here is mat*u:\n" << mat*u << std::endl; + std::cout << "Here is u^T*mat:\n" << u.transpose()*mat << std::endl; + std::cout << "Here is u^T*v:\n" << u.transpose()*v << std::endl; + std::cout << "Here is u*v^T:\n" << u*v.transpose() << std::endl; + std::cout << "Let's multiply mat by itself" << std::endl; + mat = mat*mat; + std::cout << "Now mat is mat:\n" << mat << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_arithmetic_redux_basic.cpp b/thirdparty/eigen/doc/examples/tut_arithmetic_redux_basic.cpp new file mode 100644 index 000000000..5632fb52e --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_arithmetic_redux_basic.cpp @@ -0,0 +1,16 @@ +#include +#include + +using namespace std; +int main() +{ + Eigen::Matrix2d mat; + mat << 1, 2, + 3, 4; + cout << "Here is mat.sum(): " << mat.sum() << endl; + cout << "Here is mat.prod(): " << mat.prod() << endl; + cout << "Here is mat.mean(): " << mat.mean() << endl; + cout << "Here is mat.minCoeff(): " << mat.minCoeff() << endl; + cout << "Here is mat.maxCoeff(): " << mat.maxCoeff() << endl; + cout << "Here is mat.trace(): " << mat.trace() << endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp b/thirdparty/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp new file mode 100644 index 000000000..d5f65b53e --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_arithmetic_scalar_mul_div.cpp @@ -0,0 +1,17 @@ +#include +#include + +using namespace Eigen; + +int main() +{ + Matrix2d a; + a << 1, 2, + 3, 4; + Vector3d v(1,2,3); + std::cout << "a * 2.5 =\n" << a * 2.5 << std::endl; + std::cout << "0.1 * v =\n" << 0.1 * v << std::endl; + std::cout << "Doing v *= 2;" << std::endl; + v *= 2; + std::cout << "Now v =\n" << v << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp b/thirdparty/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp new file mode 100644 index 000000000..c2da17158 --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_matrix_coefficient_accessors.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace Eigen; + +int main() +{ + MatrixXd m(2,2); + m(0,0) = 3; + m(1,0) = 2.5; + m(0,1) = -1; + m(1,1) = m(1,0) + m(0,1); + std::cout << "Here is the matrix m:\n" << m << std::endl; + VectorXd v(2); + v(0) = 4; + v(1) = v(0) - 1; + std::cout << "Here is the vector v:\n" << v << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_matrix_resize.cpp b/thirdparty/eigen/doc/examples/tut_matrix_resize.cpp new file mode 100644 index 000000000..0392c3aa5 --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_matrix_resize.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace Eigen; + +int main() +{ + MatrixXd m(2,5); + m.resize(4,3); + std::cout << "The matrix m is of size " + << m.rows() << "x" << m.cols() << std::endl; + std::cout << "It has " << m.size() << " coefficients" << std::endl; + VectorXd v(2); + v.resize(5); + std::cout << "The vector v is of size " << v.size() << std::endl; + std::cout << "As a matrix, v is of size " + << v.rows() << "x" << v.cols() << std::endl; +} diff --git a/thirdparty/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp b/thirdparty/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp new file mode 100644 index 000000000..dcbdfa783 --- /dev/null +++ b/thirdparty/eigen/doc/examples/tut_matrix_resize_fixed_size.cpp @@ -0,0 +1,12 @@ +#include +#include + +using namespace Eigen; + +int main() +{ + Matrix4d m; + m.resize(4,4); // no operation + std::cout << "The matrix m is of size " + << m.rows() << "x" << m.cols() << std::endl; +} diff --git a/thirdparty/eigen/doc/ftv2node.png b/thirdparty/eigen/doc/ftv2node.png new file mode 100644 index 0000000000000000000000000000000000000000..63c605bb4c3d941c921a4b6cfa74951e946bcb48 GIT binary patch literal 86 zcmeAS@N?(olHy`uVBq!ia0vp^0zfRr!3HExu9B$%QnH>djv*C{Z|`mdau^P8_z}#X h?B8GEpdi4(BFDx$je&7RrDQEg&ePS;Wt~$(69Dh@6T1Ka literal 0 HcmV?d00001 diff --git a/thirdparty/eigen/doc/ftv2pnode.png b/thirdparty/eigen/doc/ftv2pnode.png new file mode 100644 index 0000000000000000000000000000000000000000..c6ee22f937a07d1dbfc27c669d11f8ed13e2f152 GIT binary patch literal 229 zcmV^P)R?RzRoKvklcaQ%HF6%rK2&ZgO(-ihJ_C zzrKgp4jgO( fd_(yg|3PpEQb#9`a?Pz_00000NkvXXu0mjftR`5K literal 0 HcmV?d00001 diff --git a/thirdparty/eigen/doc/snippets/.krazy b/thirdparty/eigen/doc/snippets/.krazy new file mode 100644 index 000000000..00b99405d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/.krazy @@ -0,0 +1,2 @@ +EXCLUDE copyright +EXCLUDE license diff --git a/thirdparty/eigen/doc/snippets/AngleAxis_mimic_euler.cpp b/thirdparty/eigen/doc/snippets/AngleAxis_mimic_euler.cpp new file mode 100644 index 000000000..456de7f7e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/AngleAxis_mimic_euler.cpp @@ -0,0 +1,5 @@ +Matrix3f m; +m = AngleAxisf(0.25*M_PI, Vector3f::UnitX()) + * AngleAxisf(0.5*M_PI, Vector3f::UnitY()) + * AngleAxisf(0.33*M_PI, Vector3f::UnitZ()); +cout << m << endl << "is unitary: " << m.isUnitary() << endl; diff --git a/thirdparty/eigen/doc/snippets/BiCGSTAB_simple.cpp b/thirdparty/eigen/doc/snippets/BiCGSTAB_simple.cpp new file mode 100644 index 000000000..5520f4f1f --- /dev/null +++ b/thirdparty/eigen/doc/snippets/BiCGSTAB_simple.cpp @@ -0,0 +1,11 @@ + int n = 10000; + VectorXd x(n), b(n); + SparseMatrix A(n,n); + /* ... fill A and b ... */ + BiCGSTAB > solver; + solver.compute(A); + x = solver.solve(b); + std::cout << "#iterations: " << solver.iterations() << std::endl; + std::cout << "estimated error: " << solver.error() << std::endl; + /* ... update b ... */ + x = solver.solve(b); // solve again \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/BiCGSTAB_step_by_step.cpp b/thirdparty/eigen/doc/snippets/BiCGSTAB_step_by_step.cpp new file mode 100644 index 000000000..06147bb81 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/BiCGSTAB_step_by_step.cpp @@ -0,0 +1,14 @@ + int n = 10000; + VectorXd x(n), b(n); + SparseMatrix A(n,n); + /* ... fill A and b ... */ + BiCGSTAB > solver(A); + // start from a random solution + x = VectorXd::Random(n); + solver.setMaxIterations(1); + int i = 0; + do { + x = solver.solveWithGuess(b,x); + std::cout << i << " : " << solver.error() << std::endl; + ++i; + } while (solver.info()!=Success && i<100); \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/CMakeLists.txt b/thirdparty/eigen/doc/snippets/CMakeLists.txt new file mode 100644 index 000000000..1baf32fba --- /dev/null +++ b/thirdparty/eigen/doc/snippets/CMakeLists.txt @@ -0,0 +1,26 @@ +file(GLOB snippets_SRCS "*.cpp") + +add_custom_target(all_snippets) + +foreach(snippet_src ${snippets_SRCS}) + get_filename_component(snippet ${snippet_src} NAME_WE) + set(compile_snippet_target compile_${snippet}) + set(compile_snippet_src ${compile_snippet_target}.cpp) + file(READ ${snippet_src} snippet_source_code) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/compile_snippet.cpp.in + ${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src}) + add_executable(${compile_snippet_target} + ${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src}) + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(${compile_snippet_target} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() + add_custom_command( + TARGET ${compile_snippet_target} + POST_BUILD + COMMAND ${compile_snippet_target} + ARGS >${CMAKE_CURRENT_BINARY_DIR}/${snippet}.out + ) + add_dependencies(all_snippets ${compile_snippet_target}) + set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src} + PROPERTIES OBJECT_DEPENDS ${snippet_src}) +endforeach(snippet_src) diff --git a/thirdparty/eigen/doc/snippets/ColPivHouseholderQR_solve.cpp b/thirdparty/eigen/doc/snippets/ColPivHouseholderQR_solve.cpp new file mode 100644 index 000000000..b7b204a18 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ColPivHouseholderQR_solve.cpp @@ -0,0 +1,8 @@ +Matrix3f m = Matrix3f::Random(); +Matrix3f y = Matrix3f::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the matrix y:" << endl << y << endl; +Matrix3f x; +x = m.colPivHouseholderQr().solve(y); +assert(y.isApprox(m*x)); +cout << "Here is a solution x to the equation mx=y:" << endl << x << endl; diff --git a/thirdparty/eigen/doc/snippets/ComplexEigenSolver_compute.cpp b/thirdparty/eigen/doc/snippets/ComplexEigenSolver_compute.cpp new file mode 100644 index 000000000..11d6bd399 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ComplexEigenSolver_compute.cpp @@ -0,0 +1,16 @@ +MatrixXcf A = MatrixXcf::Random(4,4); +cout << "Here is a random 4x4 matrix, A:" << endl << A << endl << endl; + +ComplexEigenSolver ces; +ces.compute(A); +cout << "The eigenvalues of A are:" << endl << ces.eigenvalues() << endl; +cout << "The matrix of eigenvectors, V, is:" << endl << ces.eigenvectors() << endl << endl; + +complex lambda = ces.eigenvalues()[0]; +cout << "Consider the first eigenvalue, lambda = " << lambda << endl; +VectorXcf v = ces.eigenvectors().col(0); +cout << "If v is the corresponding eigenvector, then lambda * v = " << endl << lambda * v << endl; +cout << "... and A * v = " << endl << A * v << endl << endl; + +cout << "Finally, V * D * V^(-1) = " << endl + << ces.eigenvectors() * ces.eigenvalues().asDiagonal() * ces.eigenvectors().inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvalues.cpp b/thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvalues.cpp new file mode 100644 index 000000000..5509bd897 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvalues.cpp @@ -0,0 +1,4 @@ +MatrixXcf ones = MatrixXcf::Ones(3,3); +ComplexEigenSolver ces(ones, /* computeEigenvectors = */ false); +cout << "The eigenvalues of the 3x3 matrix of ones are:" + << endl << ces.eigenvalues() << endl; diff --git a/thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvectors.cpp b/thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvectors.cpp new file mode 100644 index 000000000..bb1c2ccf1 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ComplexEigenSolver_eigenvectors.cpp @@ -0,0 +1,4 @@ +MatrixXcf ones = MatrixXcf::Ones(3,3); +ComplexEigenSolver ces(ones); +cout << "The first eigenvector of the 3x3 matrix of ones is:" + << endl << ces.eigenvectors().col(1) << endl; diff --git a/thirdparty/eigen/doc/snippets/ComplexSchur_compute.cpp b/thirdparty/eigen/doc/snippets/ComplexSchur_compute.cpp new file mode 100644 index 000000000..3a5170101 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ComplexSchur_compute.cpp @@ -0,0 +1,6 @@ +MatrixXcf A = MatrixXcf::Random(4,4); +ComplexSchur schur(4); +schur.compute(A); +cout << "The matrix T in the decomposition of A is:" << endl << schur.matrixT() << endl; +schur.compute(A.inverse()); +cout << "The matrix T in the decomposition of A^(-1) is:" << endl << schur.matrixT() << endl; diff --git a/thirdparty/eigen/doc/snippets/ComplexSchur_matrixT.cpp b/thirdparty/eigen/doc/snippets/ComplexSchur_matrixT.cpp new file mode 100644 index 000000000..8380571ac --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ComplexSchur_matrixT.cpp @@ -0,0 +1,4 @@ +MatrixXcf A = MatrixXcf::Random(4,4); +cout << "Here is a random 4x4 matrix, A:" << endl << A << endl << endl; +ComplexSchur schurOfA(A, false); // false means do not compute U +cout << "The triangular matrix T is:" << endl << schurOfA.matrixT() << endl; diff --git a/thirdparty/eigen/doc/snippets/ComplexSchur_matrixU.cpp b/thirdparty/eigen/doc/snippets/ComplexSchur_matrixU.cpp new file mode 100644 index 000000000..ba3d9c22e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/ComplexSchur_matrixU.cpp @@ -0,0 +1,4 @@ +MatrixXcf A = MatrixXcf::Random(4,4); +cout << "Here is a random 4x4 matrix, A:" << endl << A << endl << endl; +ComplexSchur schurOfA(A); +cout << "The unitary matrix U is:" << endl << schurOfA.matrixU() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_abs.cpp b/thirdparty/eigen/doc/snippets/Cwise_abs.cpp new file mode 100644 index 000000000..0aeec3a40 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_abs.cpp @@ -0,0 +1,2 @@ +Array3d v(1,-2,-3); +cout << v.abs() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_abs2.cpp b/thirdparty/eigen/doc/snippets/Cwise_abs2.cpp new file mode 100644 index 000000000..2c4f9b344 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_abs2.cpp @@ -0,0 +1,2 @@ +Array3d v(1,-2,-3); +cout << v.abs2() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_acos.cpp b/thirdparty/eigen/doc/snippets/Cwise_acos.cpp new file mode 100644 index 000000000..34432cbac --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_acos.cpp @@ -0,0 +1,2 @@ +Array3d v(0, sqrt(2.)/2, 1); +cout << v.acos() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_arg.cpp b/thirdparty/eigen/doc/snippets/Cwise_arg.cpp new file mode 100644 index 000000000..3f45133b6 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_arg.cpp @@ -0,0 +1,3 @@ +ArrayXcf v = ArrayXcf::Random(3); +cout << v << endl << endl; +cout << arg(v) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_array_power_array.cpp b/thirdparty/eigen/doc/snippets/Cwise_array_power_array.cpp new file mode 100644 index 000000000..432a76ee5 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_array_power_array.cpp @@ -0,0 +1,4 @@ +Array x(8,25,3), + e(1./3.,0.5,2.); +cout << "[" << x << "]^[" << e << "] = " << x.pow(e) << endl; // using ArrayBase::pow +cout << "[" << x << "]^[" << e << "] = " << pow(x,e) << endl; // using Eigen::pow diff --git a/thirdparty/eigen/doc/snippets/Cwise_asin.cpp b/thirdparty/eigen/doc/snippets/Cwise_asin.cpp new file mode 100644 index 000000000..8dad838fd --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_asin.cpp @@ -0,0 +1,2 @@ +Array3d v(0, sqrt(2.)/2, 1); +cout << v.asin() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_atan.cpp b/thirdparty/eigen/doc/snippets/Cwise_atan.cpp new file mode 100644 index 000000000..446844726 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_atan.cpp @@ -0,0 +1,2 @@ +ArrayXd v = ArrayXd::LinSpaced(5,0,1); +cout << v.atan() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_boolean_and.cpp b/thirdparty/eigen/doc/snippets/Cwise_boolean_and.cpp new file mode 100644 index 000000000..df6b60d92 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_boolean_and.cpp @@ -0,0 +1,2 @@ +Array3d v(-1,2,1), w(-3,2,3); +cout << ((vw) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_greater_equal.cpp b/thirdparty/eigen/doc/snippets/Cwise_greater_equal.cpp new file mode 100644 index 000000000..6a08f8948 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_greater_equal.cpp @@ -0,0 +1,2 @@ +Array3d v(1,2,3), w(3,2,1); +cout << (v>=w) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_inverse.cpp b/thirdparty/eigen/doc/snippets/Cwise_inverse.cpp new file mode 100644 index 000000000..3967a7ecf --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_inverse.cpp @@ -0,0 +1,2 @@ +Array3d v(2,3,4); +cout << v.inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_isFinite.cpp b/thirdparty/eigen/doc/snippets/Cwise_isFinite.cpp new file mode 100644 index 000000000..1da55fd16 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_isFinite.cpp @@ -0,0 +1,5 @@ +Array3d v(1,2,3); +v(1) *= 0.0/0.0; +v(2) /= 0.0; +cout << v << endl << endl; +cout << isfinite(v) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_isInf.cpp b/thirdparty/eigen/doc/snippets/Cwise_isInf.cpp new file mode 100644 index 000000000..be793081c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_isInf.cpp @@ -0,0 +1,5 @@ +Array3d v(1,2,3); +v(1) *= 0.0/0.0; +v(2) /= 0.0; +cout << v << endl << endl; +cout << isinf(v) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_isNaN.cpp b/thirdparty/eigen/doc/snippets/Cwise_isNaN.cpp new file mode 100644 index 000000000..7b2a93082 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_isNaN.cpp @@ -0,0 +1,5 @@ +Array3d v(1,2,3); +v(1) *= 0.0/0.0; +v(2) /= 0.0; +cout << v << endl << endl; +cout << isnan(v) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_less.cpp b/thirdparty/eigen/doc/snippets/Cwise_less.cpp new file mode 100644 index 000000000..cafd3b6e0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_less.cpp @@ -0,0 +1,2 @@ +Array3d v(1,2,3), w(3,2,1); +cout << (v e(2,-3,1./3.); +cout << "10^[" << e << "] = " << pow(10,e) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_sign.cpp b/thirdparty/eigen/doc/snippets/Cwise_sign.cpp new file mode 100644 index 000000000..49920e4f1 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_sign.cpp @@ -0,0 +1,2 @@ +Array3d v(-3,5,0); +cout << v.sign() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_sin.cpp b/thirdparty/eigen/doc/snippets/Cwise_sin.cpp new file mode 100644 index 000000000..46fa908cb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_sin.cpp @@ -0,0 +1,2 @@ +Array3d v(M_PI, M_PI/2, M_PI/3); +cout << v.sin() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_sinh.cpp b/thirdparty/eigen/doc/snippets/Cwise_sinh.cpp new file mode 100644 index 000000000..fac9b19a8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_sinh.cpp @@ -0,0 +1,2 @@ +ArrayXd v = ArrayXd::LinSpaced(5,0,1); +cout << sinh(v) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_slash_equal.cpp b/thirdparty/eigen/doc/snippets/Cwise_slash_equal.cpp new file mode 100644 index 000000000..2efd32d84 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_slash_equal.cpp @@ -0,0 +1,3 @@ +Array3d v(3,2,4), w(5,4,2); +v /= w; +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_sqrt.cpp b/thirdparty/eigen/doc/snippets/Cwise_sqrt.cpp new file mode 100644 index 000000000..97bafe8b3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_sqrt.cpp @@ -0,0 +1,2 @@ +Array3d v(1,2,4); +cout << v.sqrt() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_square.cpp b/thirdparty/eigen/doc/snippets/Cwise_square.cpp new file mode 100644 index 000000000..f704c5e0b --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_square.cpp @@ -0,0 +1,2 @@ +Array3d v(2,3,4); +cout << v.square() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_tan.cpp b/thirdparty/eigen/doc/snippets/Cwise_tan.cpp new file mode 100644 index 000000000..b758ef04a --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_tan.cpp @@ -0,0 +1,2 @@ +Array3d v(M_PI, M_PI/2, M_PI/3); +cout << v.tan() << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_tanh.cpp b/thirdparty/eigen/doc/snippets/Cwise_tanh.cpp new file mode 100644 index 000000000..30cd0450d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_tanh.cpp @@ -0,0 +1,2 @@ +ArrayXd v = ArrayXd::LinSpaced(5,0,1); +cout << tanh(v) << endl; diff --git a/thirdparty/eigen/doc/snippets/Cwise_times_equal.cpp b/thirdparty/eigen/doc/snippets/Cwise_times_equal.cpp new file mode 100644 index 000000000..147556c73 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Cwise_times_equal.cpp @@ -0,0 +1,3 @@ +Array3d v(1,2,3), w(2,3,0); +v *= w; +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/DenseBase_LinSpaced.cpp b/thirdparty/eigen/doc/snippets/DenseBase_LinSpaced.cpp new file mode 100644 index 000000000..8e54b17fc --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DenseBase_LinSpaced.cpp @@ -0,0 +1,2 @@ +cout << VectorXi::LinSpaced(4,7,10).transpose() << endl; +cout << VectorXd::LinSpaced(5,0.0,1.0).transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/DenseBase_LinSpacedInt.cpp b/thirdparty/eigen/doc/snippets/DenseBase_LinSpacedInt.cpp new file mode 100644 index 000000000..0d7ae068e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DenseBase_LinSpacedInt.cpp @@ -0,0 +1,8 @@ +cout << "Even spacing inputs:" << endl; +cout << VectorXi::LinSpaced(8,1,4).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,8).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,15).transpose() << endl; +cout << "Uneven spacing inputs:" << endl; +cout << VectorXi::LinSpaced(8,1,7).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,9).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,16).transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/DenseBase_LinSpaced_seq.cpp b/thirdparty/eigen/doc/snippets/DenseBase_LinSpaced_seq.cpp new file mode 100644 index 000000000..f55c5085d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DenseBase_LinSpaced_seq.cpp @@ -0,0 +1,2 @@ +cout << VectorXi::LinSpaced(Sequential,4,7,10).transpose() << endl; +cout << VectorXd::LinSpaced(Sequential,5,0.0,1.0).transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/DenseBase_setLinSpaced.cpp b/thirdparty/eigen/doc/snippets/DenseBase_setLinSpaced.cpp new file mode 100644 index 000000000..46054f234 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DenseBase_setLinSpaced.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setLinSpaced(5,0.5f,1.5f); +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/DirectionWise_hnormalized.cpp b/thirdparty/eigen/doc/snippets/DirectionWise_hnormalized.cpp new file mode 100644 index 000000000..3410790a8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DirectionWise_hnormalized.cpp @@ -0,0 +1,7 @@ +typedef Matrix Matrix4Xd; +Matrix4Xd M = Matrix4Xd::Random(4,5); +Projective3d P(Matrix4d::Random()); +cout << "The matrix M is:" << endl << M << endl << endl; +cout << "M.colwise().hnormalized():" << endl << M.colwise().hnormalized() << endl << endl; +cout << "P*M:" << endl << P*M << endl << endl; +cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/DirectionWise_replicate.cpp b/thirdparty/eigen/doc/snippets/DirectionWise_replicate.cpp new file mode 100644 index 000000000..d92d4a350 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DirectionWise_replicate.cpp @@ -0,0 +1,4 @@ +MatrixXi m = MatrixXi::Random(2,3); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "m.colwise().replicate<3>() = ..." << endl; +cout << m.colwise().replicate<3>() << endl; diff --git a/thirdparty/eigen/doc/snippets/DirectionWise_replicate_int.cpp b/thirdparty/eigen/doc/snippets/DirectionWise_replicate_int.cpp new file mode 100644 index 000000000..f9b1b5355 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/DirectionWise_replicate_int.cpp @@ -0,0 +1,4 @@ +Vector3i v = Vector3i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "v.rowwise().replicate(5) = ..." << endl; +cout << v.rowwise().replicate(5) << endl; diff --git a/thirdparty/eigen/doc/snippets/EigenSolver_EigenSolver_MatrixType.cpp b/thirdparty/eigen/doc/snippets/EigenSolver_EigenSolver_MatrixType.cpp new file mode 100644 index 000000000..c1d9fa879 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/EigenSolver_EigenSolver_MatrixType.cpp @@ -0,0 +1,16 @@ +MatrixXd A = MatrixXd::Random(6,6); +cout << "Here is a random 6x6 matrix, A:" << endl << A << endl << endl; + +EigenSolver es(A); +cout << "The eigenvalues of A are:" << endl << es.eigenvalues() << endl; +cout << "The matrix of eigenvectors, V, is:" << endl << es.eigenvectors() << endl << endl; + +complex lambda = es.eigenvalues()[0]; +cout << "Consider the first eigenvalue, lambda = " << lambda << endl; +VectorXcd v = es.eigenvectors().col(0); +cout << "If v is the corresponding eigenvector, then lambda * v = " << endl << lambda * v << endl; +cout << "... and A * v = " << endl << A.cast >() * v << endl << endl; + +MatrixXcd D = es.eigenvalues().asDiagonal(); +MatrixXcd V = es.eigenvectors(); +cout << "Finally, V * D * V^(-1) = " << endl << V * D * V.inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/EigenSolver_compute.cpp b/thirdparty/eigen/doc/snippets/EigenSolver_compute.cpp new file mode 100644 index 000000000..a5c96e9b4 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/EigenSolver_compute.cpp @@ -0,0 +1,6 @@ +EigenSolver es; +MatrixXf A = MatrixXf::Random(4,4); +es.compute(A, /* computeEigenvectors = */ false); +cout << "The eigenvalues of A are: " << es.eigenvalues().transpose() << endl; +es.compute(A + MatrixXf::Identity(4,4), false); // re-use es to compute eigenvalues of A+I +cout << "The eigenvalues of A+I are: " << es.eigenvalues().transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/EigenSolver_eigenvalues.cpp b/thirdparty/eigen/doc/snippets/EigenSolver_eigenvalues.cpp new file mode 100644 index 000000000..ed28869a0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/EigenSolver_eigenvalues.cpp @@ -0,0 +1,4 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +EigenSolver es(ones, false); +cout << "The eigenvalues of the 3x3 matrix of ones are:" + << endl << es.eigenvalues() << endl; diff --git a/thirdparty/eigen/doc/snippets/EigenSolver_eigenvectors.cpp b/thirdparty/eigen/doc/snippets/EigenSolver_eigenvectors.cpp new file mode 100644 index 000000000..8355f76c9 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/EigenSolver_eigenvectors.cpp @@ -0,0 +1,4 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +EigenSolver es(ones); +cout << "The first eigenvector of the 3x3 matrix of ones is:" + << endl << es.eigenvectors().col(0) << endl; diff --git a/thirdparty/eigen/doc/snippets/EigenSolver_pseudoEigenvectors.cpp b/thirdparty/eigen/doc/snippets/EigenSolver_pseudoEigenvectors.cpp new file mode 100644 index 000000000..85e2569df --- /dev/null +++ b/thirdparty/eigen/doc/snippets/EigenSolver_pseudoEigenvectors.cpp @@ -0,0 +1,9 @@ +MatrixXd A = MatrixXd::Random(6,6); +cout << "Here is a random 6x6 matrix, A:" << endl << A << endl << endl; + +EigenSolver es(A); +MatrixXd D = es.pseudoEigenvalueMatrix(); +MatrixXd V = es.pseudoEigenvectors(); +cout << "The pseudo-eigenvalue matrix D is:" << endl << D << endl; +cout << "The pseudo-eigenvector matrix V is:" << endl << V << endl; +cout << "Finally, V * D * V^(-1) = " << endl << V * D * V.inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/FullPivHouseholderQR_solve.cpp b/thirdparty/eigen/doc/snippets/FullPivHouseholderQR_solve.cpp new file mode 100644 index 000000000..23bc0749d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/FullPivHouseholderQR_solve.cpp @@ -0,0 +1,8 @@ +Matrix3f m = Matrix3f::Random(); +Matrix3f y = Matrix3f::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the matrix y:" << endl << y << endl; +Matrix3f x; +x = m.fullPivHouseholderQr().solve(y); +assert(y.isApprox(m*x)); +cout << "Here is a solution x to the equation mx=y:" << endl << x << endl; diff --git a/thirdparty/eigen/doc/snippets/FullPivLU_image.cpp b/thirdparty/eigen/doc/snippets/FullPivLU_image.cpp new file mode 100644 index 000000000..817bc1e2d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/FullPivLU_image.cpp @@ -0,0 +1,9 @@ +Matrix3d m; +m << 1,1,0, + 1,3,2, + 0,1,1; +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Notice that the middle column is the sum of the two others, so the " + << "columns are linearly dependent." << endl; +cout << "Here is a matrix whose columns have the same span but are linearly independent:" + << endl << m.fullPivLu().image(m) << endl; diff --git a/thirdparty/eigen/doc/snippets/FullPivLU_kernel.cpp b/thirdparty/eigen/doc/snippets/FullPivLU_kernel.cpp new file mode 100644 index 000000000..7086e01e2 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/FullPivLU_kernel.cpp @@ -0,0 +1,7 @@ +MatrixXf m = MatrixXf::Random(3,5); +cout << "Here is the matrix m:" << endl << m << endl; +MatrixXf ker = m.fullPivLu().kernel(); +cout << "Here is a matrix whose columns form a basis of the kernel of m:" + << endl << ker << endl; +cout << "By definition of the kernel, m*ker is zero:" + << endl << m*ker << endl; diff --git a/thirdparty/eigen/doc/snippets/FullPivLU_solve.cpp b/thirdparty/eigen/doc/snippets/FullPivLU_solve.cpp new file mode 100644 index 000000000..c1f88235e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/FullPivLU_solve.cpp @@ -0,0 +1,11 @@ +Matrix m = Matrix::Random(); +Matrix2f y = Matrix2f::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the matrix y:" << endl << y << endl; +Matrix x = m.fullPivLu().solve(y); +if((m*x).isApprox(y)) +{ + cout << "Here is a solution x to the equation mx=y:" << endl << x << endl; +} +else + cout << "The equation mx=y does not have any solution." << endl; diff --git a/thirdparty/eigen/doc/snippets/GeneralizedEigenSolver.cpp b/thirdparty/eigen/doc/snippets/GeneralizedEigenSolver.cpp new file mode 100644 index 000000000..2acda45fa --- /dev/null +++ b/thirdparty/eigen/doc/snippets/GeneralizedEigenSolver.cpp @@ -0,0 +1,7 @@ +GeneralizedEigenSolver ges; +MatrixXf A = MatrixXf::Random(4,4); +MatrixXf B = MatrixXf::Random(4,4); +ges.compute(A, B); +cout << "The (complex) numerators of the generalzied eigenvalues are: " << ges.alphas().transpose() << endl; +cout << "The (real) denominatore of the generalzied eigenvalues are: " << ges.betas().transpose() << endl; +cout << "The (complex) generalzied eigenvalues are (alphas./beta): " << ges.eigenvalues().transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/HessenbergDecomposition_compute.cpp b/thirdparty/eigen/doc/snippets/HessenbergDecomposition_compute.cpp new file mode 100644 index 000000000..50e37833a --- /dev/null +++ b/thirdparty/eigen/doc/snippets/HessenbergDecomposition_compute.cpp @@ -0,0 +1,6 @@ +MatrixXcf A = MatrixXcf::Random(4,4); +HessenbergDecomposition hd(4); +hd.compute(A); +cout << "The matrix H in the decomposition of A is:" << endl << hd.matrixH() << endl; +hd.compute(2*A); // re-use hd to compute and store decomposition of 2A +cout << "The matrix H in the decomposition of 2A is:" << endl << hd.matrixH() << endl; diff --git a/thirdparty/eigen/doc/snippets/HessenbergDecomposition_matrixH.cpp b/thirdparty/eigen/doc/snippets/HessenbergDecomposition_matrixH.cpp new file mode 100644 index 000000000..af0136668 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/HessenbergDecomposition_matrixH.cpp @@ -0,0 +1,8 @@ +Matrix4f A = MatrixXf::Random(4,4); +cout << "Here is a random 4x4 matrix:" << endl << A << endl; +HessenbergDecomposition hessOfA(A); +MatrixXf H = hessOfA.matrixH(); +cout << "The Hessenberg matrix H is:" << endl << H << endl; +MatrixXf Q = hessOfA.matrixQ(); +cout << "The orthogonal matrix Q is:" << endl << Q << endl; +cout << "Q H Q^T is:" << endl << Q * H * Q.transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/HessenbergDecomposition_packedMatrix.cpp b/thirdparty/eigen/doc/snippets/HessenbergDecomposition_packedMatrix.cpp new file mode 100644 index 000000000..4fa5957e8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/HessenbergDecomposition_packedMatrix.cpp @@ -0,0 +1,9 @@ +Matrix4d A = Matrix4d::Random(4,4); +cout << "Here is a random 4x4 matrix:" << endl << A << endl; +HessenbergDecomposition hessOfA(A); +Matrix4d pm = hessOfA.packedMatrix(); +cout << "The packed matrix M is:" << endl << pm << endl; +cout << "The upper Hessenberg part corresponds to the matrix H, which is:" + << endl << hessOfA.matrixH() << endl; +Vector3d hc = hessOfA.householderCoefficients(); +cout << "The vector of Householder coefficients is:" << endl << hc << endl; diff --git a/thirdparty/eigen/doc/snippets/HouseholderQR_householderQ.cpp b/thirdparty/eigen/doc/snippets/HouseholderQR_householderQ.cpp new file mode 100644 index 000000000..e859ce55b --- /dev/null +++ b/thirdparty/eigen/doc/snippets/HouseholderQR_householderQ.cpp @@ -0,0 +1,7 @@ +MatrixXf A(MatrixXf::Random(5,3)), thinQ(MatrixXf::Identity(5,3)), Q; +A.setRandom(); +HouseholderQR qr(A); +Q = qr.householderQ(); +thinQ = qr.householderQ() * thinQ; +std::cout << "The complete unitary matrix Q is:\n" << Q << "\n\n"; +std::cout << "The thin matrix Q is:\n" << thinQ << "\n\n"; diff --git a/thirdparty/eigen/doc/snippets/HouseholderQR_solve.cpp b/thirdparty/eigen/doc/snippets/HouseholderQR_solve.cpp new file mode 100644 index 000000000..8cce6ce6c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/HouseholderQR_solve.cpp @@ -0,0 +1,9 @@ +typedef Matrix Matrix3x3; +Matrix3x3 m = Matrix3x3::Random(); +Matrix3f y = Matrix3f::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the matrix y:" << endl << y << endl; +Matrix3f x; +x = m.householderQr().solve(y); +assert(y.isApprox(m*x)); +cout << "Here is a solution x to the equation mx=y:" << endl << x << endl; diff --git a/thirdparty/eigen/doc/snippets/HouseholderSequence_HouseholderSequence.cpp b/thirdparty/eigen/doc/snippets/HouseholderSequence_HouseholderSequence.cpp new file mode 100644 index 000000000..2632b83b9 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/HouseholderSequence_HouseholderSequence.cpp @@ -0,0 +1,31 @@ +Matrix3d v = Matrix3d::Random(); +cout << "The matrix v is:" << endl; +cout << v << endl; + +Vector3d v0(1, v(1,0), v(2,0)); +cout << "The first Householder vector is: v_0 = " << v0.transpose() << endl; +Vector3d v1(0, 1, v(2,1)); +cout << "The second Householder vector is: v_1 = " << v1.transpose() << endl; +Vector3d v2(0, 0, 1); +cout << "The third Householder vector is: v_2 = " << v2.transpose() << endl; + +Vector3d h = Vector3d::Random(); +cout << "The Householder coefficients are: h = " << h.transpose() << endl; + +Matrix3d H0 = Matrix3d::Identity() - h(0) * v0 * v0.adjoint(); +cout << "The first Householder reflection is represented by H_0 = " << endl; +cout << H0 << endl; +Matrix3d H1 = Matrix3d::Identity() - h(1) * v1 * v1.adjoint(); +cout << "The second Householder reflection is represented by H_1 = " << endl; +cout << H1 << endl; +Matrix3d H2 = Matrix3d::Identity() - h(2) * v2 * v2.adjoint(); +cout << "The third Householder reflection is represented by H_2 = " << endl; +cout << H2 << endl; +cout << "Their product is H_0 H_1 H_2 = " << endl; +cout << H0 * H1 * H2 << endl; + +HouseholderSequence hhSeq(v, h); +Matrix3d hhSeqAsMatrix(hhSeq); +cout << "If we construct a HouseholderSequence from v and h" << endl; +cout << "and convert it to a matrix, we get:" << endl; +cout << hhSeqAsMatrix << endl; diff --git a/thirdparty/eigen/doc/snippets/IOFormat.cpp b/thirdparty/eigen/doc/snippets/IOFormat.cpp new file mode 100644 index 000000000..735f5dd85 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/IOFormat.cpp @@ -0,0 +1,14 @@ +std::string sep = "\n----------------------------------------\n"; +Matrix3d m1; +m1 << 1.111111, 2, 3.33333, 4, 5, 6, 7, 8.888888, 9; + +IOFormat CommaInitFmt(StreamPrecision, DontAlignCols, ", ", ", ", "", "", " << ", ";"); +IOFormat CleanFmt(4, 0, ", ", "\n", "[", "]"); +IOFormat OctaveFmt(StreamPrecision, 0, ", ", ";\n", "", "", "[", "]"); +IOFormat HeavyFmt(FullPrecision, 0, ", ", ";\n", "[", "]", "[", "]"); + +std::cout << m1 << sep; +std::cout << m1.format(CommaInitFmt) << sep; +std::cout << m1.format(CleanFmt) << sep; +std::cout << m1.format(OctaveFmt) << sep; +std::cout << m1.format(HeavyFmt) << sep; diff --git a/thirdparty/eigen/doc/snippets/JacobiSVD_basic.cpp b/thirdparty/eigen/doc/snippets/JacobiSVD_basic.cpp new file mode 100644 index 000000000..ab24b9bca --- /dev/null +++ b/thirdparty/eigen/doc/snippets/JacobiSVD_basic.cpp @@ -0,0 +1,9 @@ +MatrixXf m = MatrixXf::Random(3,2); +cout << "Here is the matrix m:" << endl << m << endl; +JacobiSVD svd(m, ComputeThinU | ComputeThinV); +cout << "Its singular values are:" << endl << svd.singularValues() << endl; +cout << "Its left singular vectors are the columns of the thin U matrix:" << endl << svd.matrixU() << endl; +cout << "Its right singular vectors are the columns of the thin V matrix:" << endl << svd.matrixV() << endl; +Vector3f rhs(1, 0, 0); +cout << "Now consider this rhs vector:" << endl << rhs << endl; +cout << "A least-squares solution of m*x = rhs is:" << endl << svd.solve(rhs) << endl; diff --git a/thirdparty/eigen/doc/snippets/Jacobi_makeGivens.cpp b/thirdparty/eigen/doc/snippets/Jacobi_makeGivens.cpp new file mode 100644 index 000000000..4b733c306 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Jacobi_makeGivens.cpp @@ -0,0 +1,6 @@ +Vector2f v = Vector2f::Random(); +JacobiRotation G; +G.makeGivens(v.x(), v.y()); +cout << "Here is the vector v:" << endl << v << endl; +v.applyOnTheLeft(0, 1, G.adjoint()); +cout << "Here is the vector J' * v:" << endl << v << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Jacobi_makeJacobi.cpp b/thirdparty/eigen/doc/snippets/Jacobi_makeJacobi.cpp new file mode 100644 index 000000000..0cc331d9f --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Jacobi_makeJacobi.cpp @@ -0,0 +1,8 @@ +Matrix2f m = Matrix2f::Random(); +m = (m + m.adjoint()).eval(); +JacobiRotation J; +J.makeJacobi(m, 0, 1); +cout << "Here is the matrix m:" << endl << m << endl; +m.applyOnTheLeft(0, 1, J.adjoint()); +m.applyOnTheRight(0, 1, J); +cout << "Here is the matrix J' * m * J:" << endl << m << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/LLT_example.cpp b/thirdparty/eigen/doc/snippets/LLT_example.cpp new file mode 100644 index 000000000..46fb40704 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/LLT_example.cpp @@ -0,0 +1,12 @@ +MatrixXd A(3,3); +A << 4,-1,2, -1,6,0, 2,0,5; +cout << "The matrix A is" << endl << A << endl; + +LLT lltOfA(A); // compute the Cholesky decomposition of A +MatrixXd L = lltOfA.matrixL(); // retrieve factor L in the decomposition +// The previous two lines can also be written as "L = A.llt().matrixL()" + +cout << "The Cholesky factor L is" << endl << L << endl; +cout << "To check this, let us compute L * L.transpose()" << endl; +cout << L * L.transpose() << endl; +cout << "This should equal the matrix A" << endl; diff --git a/thirdparty/eigen/doc/snippets/LLT_solve.cpp b/thirdparty/eigen/doc/snippets/LLT_solve.cpp new file mode 100644 index 000000000..7095d2cc3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/LLT_solve.cpp @@ -0,0 +1,8 @@ +typedef Matrix DataMatrix; +// let's generate some samples on the 3D plane of equation z = 2x+3y (with some noise) +DataMatrix samples = DataMatrix::Random(12,2); +VectorXf elevations = 2*samples.col(0) + 3*samples.col(1) + VectorXf::Random(12)*0.1; +// and let's solve samples * [x y]^T = elevations in least square sense: +Matrix xy + = (samples.adjoint() * samples).llt().solve((samples.adjoint()*elevations)); +cout << xy << endl; diff --git a/thirdparty/eigen/doc/snippets/LeastSquaresNormalEquations.cpp b/thirdparty/eigen/doc/snippets/LeastSquaresNormalEquations.cpp new file mode 100644 index 000000000..997cf1715 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/LeastSquaresNormalEquations.cpp @@ -0,0 +1,4 @@ +MatrixXf A = MatrixXf::Random(3, 2); +VectorXf b = VectorXf::Random(3); +cout << "The solution using normal equations is:\n" + << (A.transpose() * A).ldlt().solve(A.transpose() * b) << endl; diff --git a/thirdparty/eigen/doc/snippets/LeastSquaresQR.cpp b/thirdparty/eigen/doc/snippets/LeastSquaresQR.cpp new file mode 100644 index 000000000..6c9704547 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/LeastSquaresQR.cpp @@ -0,0 +1,4 @@ +MatrixXf A = MatrixXf::Random(3, 2); +VectorXf b = VectorXf::Random(3); +cout << "The solution using the QR decomposition is:\n" + << A.colPivHouseholderQr().solve(b) << endl; diff --git a/thirdparty/eigen/doc/snippets/Map_general_stride.cpp b/thirdparty/eigen/doc/snippets/Map_general_stride.cpp new file mode 100644 index 000000000..0657e7f84 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Map_general_stride.cpp @@ -0,0 +1,5 @@ +int array[24]; +for(int i = 0; i < 24; ++i) array[i] = i; +cout << Map > + (array, 3, 3, Stride(8, 2)) + << endl; diff --git a/thirdparty/eigen/doc/snippets/Map_inner_stride.cpp b/thirdparty/eigen/doc/snippets/Map_inner_stride.cpp new file mode 100644 index 000000000..d95ae9b3e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Map_inner_stride.cpp @@ -0,0 +1,5 @@ +int array[12]; +for(int i = 0; i < 12; ++i) array[i] = i; +cout << Map > + (array, 6) // the inner stride has already been passed as template parameter + << endl; diff --git a/thirdparty/eigen/doc/snippets/Map_outer_stride.cpp b/thirdparty/eigen/doc/snippets/Map_outer_stride.cpp new file mode 100644 index 000000000..2f6f052c3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Map_outer_stride.cpp @@ -0,0 +1,3 @@ +int array[12]; +for(int i = 0; i < 12; ++i) array[i] = i; +cout << Map >(array, 3, 3, OuterStride<>(4)) << endl; diff --git a/thirdparty/eigen/doc/snippets/Map_placement_new.cpp b/thirdparty/eigen/doc/snippets/Map_placement_new.cpp new file mode 100644 index 000000000..2e40eca32 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Map_placement_new.cpp @@ -0,0 +1,5 @@ +int data[] = {1,2,3,4,5,6,7,8,9}; +Map v(data,4); +cout << "The mapped vector v is: " << v << "\n"; +new (&v) Map(data+4,5); +cout << "Now v is: " << v << "\n"; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Map_simple.cpp b/thirdparty/eigen/doc/snippets/Map_simple.cpp new file mode 100644 index 000000000..423bb52ad --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Map_simple.cpp @@ -0,0 +1,3 @@ +int array[9]; +for(int i = 0; i < 9; ++i) array[i] = i; +cout << Map(array) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_adjoint.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_adjoint.cpp new file mode 100644 index 000000000..4680d5938 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_adjoint.cpp @@ -0,0 +1,3 @@ +Matrix2cf m = Matrix2cf::Random(); +cout << "Here is the 2x2 complex matrix m:" << endl << m << endl; +cout << "Here is the adjoint of m:" << endl << m.adjoint() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_all.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_all.cpp new file mode 100644 index 000000000..46f26f189 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_all.cpp @@ -0,0 +1,7 @@ +Vector3f boxMin(Vector3f::Zero()), boxMax(Vector3f::Ones()); +Vector3f p0 = Vector3f::Random(), p1 = Vector3f::Random().cwiseAbs(); +// let's check if p0 and p1 are inside the axis aligned box defined by the corners boxMin,boxMax: +cout << "Is (" << p0.transpose() << ") inside the box: " + << ((boxMin.array()p0.array()).all()) << endl; +cout << "Is (" << p1.transpose() << ") inside the box: " + << ((boxMin.array()p1.array()).all()) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheLeft.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheLeft.cpp new file mode 100644 index 000000000..6398c873a --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheLeft.cpp @@ -0,0 +1,7 @@ +Matrix3f A = Matrix3f::Random(3,3), B; +B << 0,1,0, + 0,0,1, + 1,0,0; +cout << "At start, A = " << endl << A << endl; +A.applyOnTheLeft(B); +cout << "After applyOnTheLeft, A = " << endl << A << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheRight.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheRight.cpp new file mode 100644 index 000000000..e4b71b2d8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_applyOnTheRight.cpp @@ -0,0 +1,9 @@ +Matrix3f A = Matrix3f::Random(3,3), B; +B << 0,1,0, + 0,0,1, + 1,0,0; +cout << "At start, A = " << endl << A << endl; +A *= B; +cout << "After A *= B, A = " << endl << A << endl; +A.applyOnTheRight(B); // equivalent to A *= B +cout << "After applyOnTheRight, A = " << endl << A << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_array.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_array.cpp new file mode 100644 index 000000000..f215086db --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_array.cpp @@ -0,0 +1,4 @@ +Vector3d v(1,2,3); +v.array() += 3; +v.array() -= 2; +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_array_const.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_array_const.cpp new file mode 100644 index 000000000..cd3b26a7c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_array_const.cpp @@ -0,0 +1,4 @@ +Vector3d v(-1,2,-3); +cout << "the absolute values:" << endl << v.array().abs() << endl; +cout << "the absolute values plus one:" << endl << v.array().abs()+1 << endl; +cout << "sum of the squares: " << v.array().square().sum() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_asDiagonal.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_asDiagonal.cpp new file mode 100644 index 000000000..b01082db1 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_asDiagonal.cpp @@ -0,0 +1 @@ +cout << Matrix3i(Vector3i(2,5,6).asDiagonal()) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_block_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_block_int_int.cpp new file mode 100644 index 000000000..f99b6d4ca --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_block_int_int.cpp @@ -0,0 +1,5 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.block<2,2>(1,1):" << endl << m.block<2,2>(1,1) << endl; +m.block<2,2>(1,1).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_block_int_int_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_block_int_int_int_int.cpp new file mode 100644 index 000000000..7238cbbed --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_block_int_int_int_int.cpp @@ -0,0 +1,5 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.block(1, 1, 2, 2):" << endl << m.block(1, 1, 2, 2) << endl; +m.block(1, 1, 2, 2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_bottomLeftCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_bottomLeftCorner_int_int.cpp new file mode 100644 index 000000000..ebae95e1d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_bottomLeftCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.bottomLeftCorner(2, 2):" << endl; +cout << m.bottomLeftCorner(2, 2) << endl; +m.bottomLeftCorner(2, 2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_bottomRightCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_bottomRightCorner_int_int.cpp new file mode 100644 index 000000000..bf05093af --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_bottomRightCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.bottomRightCorner(2, 2):" << endl; +cout << m.bottomRightCorner(2, 2) << endl; +m.bottomRightCorner(2, 2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_bottomRows_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_bottomRows_int.cpp new file mode 100644 index 000000000..47ca92ec3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_bottomRows_int.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.bottomRows(2):" << endl; +cout << a.bottomRows(2) << endl; +a.bottomRows(2).setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cast.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cast.cpp new file mode 100644 index 000000000..016880b40 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cast.cpp @@ -0,0 +1,3 @@ +Matrix2d md = Matrix2d::Identity() * 0.45; +Matrix2f mf = Matrix2f::Identity(); +cout << md + mf.cast() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_col.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_col.cpp new file mode 100644 index 000000000..87c91b129 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_col.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Identity(); +m.col(1) = Vector3d(4,5,6); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_colwise.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_colwise.cpp new file mode 100644 index 000000000..a048beffa --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_colwise.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the sum of each column:" << endl << m.colwise().sum() << endl; +cout << "Here is the maximum absolute value of each column:" + << endl << m.cwiseAbs().colwise().maxCoeff() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_computeInverseAndDetWithCheck.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_computeInverseAndDetWithCheck.cpp new file mode 100644 index 000000000..a7b084fd0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_computeInverseAndDetWithCheck.cpp @@ -0,0 +1,13 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +Matrix3d inverse; +bool invertible; +double determinant; +m.computeInverseAndDetWithCheck(inverse,determinant,invertible); +cout << "Its determinant is " << determinant << endl; +if(invertible) { + cout << "It is invertible, and its inverse is:" << endl << inverse << endl; +} +else { + cout << "It is not invertible." << endl; +} diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_computeInverseWithCheck.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_computeInverseWithCheck.cpp new file mode 100644 index 000000000..873a9f870 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_computeInverseWithCheck.cpp @@ -0,0 +1,11 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +Matrix3d inverse; +bool invertible; +m.computeInverseWithCheck(inverse,invertible); +if(invertible) { + cout << "It is invertible, and its inverse is:" << endl << inverse << endl; +} +else { + cout << "It is not invertible." << endl; +} diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs.cpp new file mode 100644 index 000000000..28a31600f --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs.cpp @@ -0,0 +1,4 @@ +MatrixXd m(2,3); +m << 2, -4, 6, + -5, 1, 0; +cout << m.cwiseAbs() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs2.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs2.cpp new file mode 100644 index 000000000..889a2e2ba --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseAbs2.cpp @@ -0,0 +1,4 @@ +MatrixXd m(2,3); +m << 2, -4, 6, + -5, 1, 0; +cout << m.cwiseAbs2() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseEqual.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseEqual.cpp new file mode 100644 index 000000000..eb3656f4c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseEqual.cpp @@ -0,0 +1,7 @@ +MatrixXi m(2,2); +m << 1, 0, + 1, 1; +cout << "Comparing m with identity matrix:" << endl; +cout << m.cwiseEqual(MatrixXi::Identity(2,2)) << endl; +int count = m.cwiseEqual(MatrixXi::Identity(2,2)).count(); +cout << "Number of coefficients that are equal: " << count << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseInverse.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseInverse.cpp new file mode 100644 index 000000000..23e08f7b9 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseInverse.cpp @@ -0,0 +1,4 @@ +MatrixXd m(2,3); +m << 2, 0.5, 1, + 3, 0.25, 1; +cout << m.cwiseInverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseMax.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseMax.cpp new file mode 100644 index 000000000..3c956818b --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseMax.cpp @@ -0,0 +1,2 @@ +Vector3d v(2,3,4), w(4,2,3); +cout << v.cwiseMax(w) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseMin.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseMin.cpp new file mode 100644 index 000000000..82fc761e2 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseMin.cpp @@ -0,0 +1,2 @@ +Vector3d v(2,3,4), w(4,2,3); +cout << v.cwiseMin(w) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseNotEqual.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseNotEqual.cpp new file mode 100644 index 000000000..6a2e4fb6c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseNotEqual.cpp @@ -0,0 +1,7 @@ +MatrixXi m(2,2); +m << 1, 0, + 1, 1; +cout << "Comparing m with identity matrix:" << endl; +cout << m.cwiseNotEqual(MatrixXi::Identity(2,2)) << endl; +int count = m.cwiseNotEqual(MatrixXi::Identity(2,2)).count(); +cout << "Number of coefficients that are not equal: " << count << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseProduct.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseProduct.cpp new file mode 100644 index 000000000..1db3a1132 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseProduct.cpp @@ -0,0 +1,4 @@ +Matrix3i a = Matrix3i::Random(), b = Matrix3i::Random(); +Matrix3i c = a.cwiseProduct(b); +cout << "a:\n" << a << "\nb:\n" << b << "\nc:\n" << c << endl; + diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseQuotient.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseQuotient.cpp new file mode 100644 index 000000000..969121208 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseQuotient.cpp @@ -0,0 +1,2 @@ +Vector3d v(2,3,4), w(4,2,3); +cout << v.cwiseQuotient(w) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseSign.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseSign.cpp new file mode 100644 index 000000000..efd717955 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseSign.cpp @@ -0,0 +1,4 @@ +MatrixXd m(2,3); +m << 2, -4, 6, + -5, 1, 0; +cout << m.cwiseSign() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_cwiseSqrt.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseSqrt.cpp new file mode 100644 index 000000000..4bfd75d50 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_cwiseSqrt.cpp @@ -0,0 +1,2 @@ +Vector3d v(1,2,4); +cout << v.cwiseSqrt() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_diagonal.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_diagonal.cpp new file mode 100644 index 000000000..cd63413f3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_diagonal.cpp @@ -0,0 +1,4 @@ +Matrix3i m = Matrix3i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here are the coefficients on the main diagonal of m:" << endl + << m.diagonal() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_diagonal_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_diagonal_int.cpp new file mode 100644 index 000000000..7b66abf67 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_diagonal_int.cpp @@ -0,0 +1,5 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here are the coefficients on the 1st super-diagonal and 2nd sub-diagonal of m:" << endl + << m.diagonal(1).transpose() << endl + << m.diagonal(-2).transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_diagonal_template_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_diagonal_template_int.cpp new file mode 100644 index 000000000..0e73d1c16 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_diagonal_template_int.cpp @@ -0,0 +1,5 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here are the coefficients on the 1st super-diagonal and 2nd sub-diagonal of m:" << endl + << m.diagonal<1>().transpose() << endl + << m.diagonal<-2>().transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_eigenvalues.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_eigenvalues.cpp new file mode 100644 index 000000000..039f88701 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_eigenvalues.cpp @@ -0,0 +1,3 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +VectorXcd eivals = ones.eigenvalues(); +cout << "The eigenvalues of the 3x3 matrix of ones are:" << endl << eivals << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_end_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_end_int.cpp new file mode 100644 index 000000000..03c54a931 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_end_int.cpp @@ -0,0 +1,5 @@ +RowVector4i v = RowVector4i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "Here is v.tail(2):" << endl << v.tail(2) << endl; +v.tail(2).setZero(); +cout << "Now the vector v is:" << endl << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_eval.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_eval.cpp new file mode 100644 index 000000000..1df3aa01d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_eval.cpp @@ -0,0 +1,12 @@ +Matrix2f M = Matrix2f::Random(); +Matrix2f m; +m = M; +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Now we want to copy a column into a row." << endl; +cout << "If we do m.col(1) = m.row(0), then m becomes:" << endl; +m.col(1) = m.row(0); +cout << m << endl << "which is wrong!" << endl; +cout << "Now let us instead do m.col(1) = m.row(0).eval(). Then m becomes" << endl; +m = M; +m.col(1) = m.row(0).eval(); +cout << m << endl << "which is right." << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_fixedBlock_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_fixedBlock_int_int.cpp new file mode 100644 index 000000000..320112748 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_fixedBlock_int_int.cpp @@ -0,0 +1,5 @@ +Matrix4d m = Vector4d(1,2,3,4).asDiagonal(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.fixed<2, 2>(2, 2):" << endl << m.block<2, 2>(2, 2) << endl; +m.block<2, 2>(2, 0) = m.block<2, 2>(2, 2); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_hnormalized.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_hnormalized.cpp new file mode 100644 index 000000000..652cd77c0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_hnormalized.cpp @@ -0,0 +1,6 @@ +Vector4d v = Vector4d::Random(); +Projective3d P(Matrix4d::Random()); +cout << "v = " << v.transpose() << "]^T" << endl; +cout << "v.hnormalized() = " << v.hnormalized().transpose() << "]^T" << endl; +cout << "P*v = " << (P*v).transpose() << "]^T" << endl; +cout << "(P*v).hnormalized() = " << (P*v).hnormalized().transpose() << "]^T" << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_homogeneous.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_homogeneous.cpp new file mode 100644 index 000000000..457c28f91 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_homogeneous.cpp @@ -0,0 +1,6 @@ +Vector3d v = Vector3d::Random(), w; +Projective3d P(Matrix4d::Random()); +cout << "v = [" << v.transpose() << "]^T" << endl; +cout << "h.homogeneous() = [" << v.homogeneous().transpose() << "]^T" << endl; +cout << "(P * v.homogeneous()) = [" << (P * v.homogeneous()).transpose() << "]^T" << endl; +cout << "(P * v.homogeneous()).hnormalized() = [" << (P * v.homogeneous()).eval().hnormalized().transpose() << "]^T" << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_identity.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_identity.cpp new file mode 100644 index 000000000..b5c1e59c9 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_identity.cpp @@ -0,0 +1 @@ +cout << Matrix::Identity() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_identity_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_identity_int_int.cpp new file mode 100644 index 000000000..918649d64 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_identity_int_int.cpp @@ -0,0 +1 @@ +cout << MatrixXd::Identity(4, 3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_inverse.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_inverse.cpp new file mode 100644 index 000000000..a56142ee0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_inverse.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Its inverse is:" << endl << m.inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_isDiagonal.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_isDiagonal.cpp new file mode 100644 index 000000000..5b1d59977 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_isDiagonal.cpp @@ -0,0 +1,6 @@ +Matrix3d m = 10000 * Matrix3d::Identity(); +m(0,2) = 1; +cout << "Here's the matrix m:" << endl << m << endl; +cout << "m.isDiagonal() returns: " << m.isDiagonal() << endl; +cout << "m.isDiagonal(1e-3) returns: " << m.isDiagonal(1e-3) << endl; + diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_isIdentity.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_isIdentity.cpp new file mode 100644 index 000000000..17b756c97 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_isIdentity.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Identity(); +m(0,2) = 1e-4; +cout << "Here's the matrix m:" << endl << m << endl; +cout << "m.isIdentity() returns: " << m.isIdentity() << endl; +cout << "m.isIdentity(1e-3) returns: " << m.isIdentity(1e-3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_isOnes.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_isOnes.cpp new file mode 100644 index 000000000..f82f62809 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_isOnes.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Ones(); +m(0,2) += 1e-4; +cout << "Here's the matrix m:" << endl << m << endl; +cout << "m.isOnes() returns: " << m.isOnes() << endl; +cout << "m.isOnes(1e-3) returns: " << m.isOnes(1e-3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_isOrthogonal.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_isOrthogonal.cpp new file mode 100644 index 000000000..b22af066c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_isOrthogonal.cpp @@ -0,0 +1,6 @@ +Vector3d v(1,0,0); +Vector3d w(1e-4,0,1); +cout << "Here's the vector v:" << endl << v << endl; +cout << "Here's the vector w:" << endl << w << endl; +cout << "v.isOrthogonal(w) returns: " << v.isOrthogonal(w) << endl; +cout << "v.isOrthogonal(w,1e-3) returns: " << v.isOrthogonal(w,1e-3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_isUnitary.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_isUnitary.cpp new file mode 100644 index 000000000..3877da347 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_isUnitary.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Identity(); +m(0,2) = 1e-4; +cout << "Here's the matrix m:" << endl << m << endl; +cout << "m.isUnitary() returns: " << m.isUnitary() << endl; +cout << "m.isUnitary(1e-3) returns: " << m.isUnitary(1e-3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_isZero.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_isZero.cpp new file mode 100644 index 000000000..c2cfe2201 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_isZero.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Zero(); +m(0,2) = 1e-4; +cout << "Here's the matrix m:" << endl << m << endl; +cout << "m.isZero() returns: " << m.isZero() << endl; +cout << "m.isZero(1e-3) returns: " << m.isZero(1e-3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_leftCols_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_leftCols_int.cpp new file mode 100644 index 000000000..6ea984e4e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_leftCols_int.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.leftCols(2):" << endl; +cout << a.leftCols(2) << endl; +a.leftCols(2).setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_noalias.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_noalias.cpp new file mode 100644 index 000000000..3b54a79a6 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_noalias.cpp @@ -0,0 +1,3 @@ +Matrix2d a, b, c; a << 1,2,3,4; b << 5,6,7,8; +c.noalias() = a * b; // this computes the product directly to c +cout << c << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_ones.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_ones.cpp new file mode 100644 index 000000000..02c767c95 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_ones.cpp @@ -0,0 +1,2 @@ +cout << Matrix2d::Ones() << endl; +cout << 6 * RowVector4i::Ones() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_ones_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_ones_int.cpp new file mode 100644 index 000000000..2ef188e7d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_ones_int.cpp @@ -0,0 +1,2 @@ +cout << 6 * RowVectorXi::Ones(4) << endl; +cout << VectorXf::Ones(2) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_ones_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_ones_int_int.cpp new file mode 100644 index 000000000..60f5a31eb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_ones_int_int.cpp @@ -0,0 +1 @@ +cout << MatrixXi::Ones(2,3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_operatorNorm.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_operatorNorm.cpp new file mode 100644 index 000000000..355246f0d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_operatorNorm.cpp @@ -0,0 +1,3 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +cout << "The operator norm of the 3x3 matrix of ones is " + << ones.operatorNorm() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_prod.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_prod.cpp new file mode 100644 index 000000000..d2f27bdc3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_prod.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the product of all the coefficients:" << endl << m.prod() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_random.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_random.cpp new file mode 100644 index 000000000..65fc524f1 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_random.cpp @@ -0,0 +1 @@ +cout << 100 * Matrix2i::Random() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_random_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_random_int.cpp new file mode 100644 index 000000000..f161d03c2 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_random_int.cpp @@ -0,0 +1 @@ +cout << VectorXi::Random(2) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_random_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_random_int_int.cpp new file mode 100644 index 000000000..3f0f7dd5d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_random_int_int.cpp @@ -0,0 +1 @@ +cout << MatrixXi::Random(2,3) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_replicate.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_replicate.cpp new file mode 100644 index 000000000..3ce52bcd5 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_replicate.cpp @@ -0,0 +1,4 @@ +MatrixXi m = MatrixXi::Random(2,3); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "m.replicate<3,2>() = ..." << endl; +cout << m.replicate<3,2>() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_replicate_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_replicate_int_int.cpp new file mode 100644 index 000000000..b1dbc70bc --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_replicate_int_int.cpp @@ -0,0 +1,4 @@ +Vector3i v = Vector3i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "v.replicate(2,5) = ..." << endl; +cout << v.replicate(2,5) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_reverse.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_reverse.cpp new file mode 100644 index 000000000..f545a2837 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_reverse.cpp @@ -0,0 +1,8 @@ +MatrixXi m = MatrixXi::Random(3,4); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the reverse of m:" << endl << m.reverse() << endl; +cout << "Here is the coefficient (1,0) in the reverse of m:" << endl + << m.reverse()(1,0) << endl; +cout << "Let us overwrite this coefficient with the value 4." << endl; +m.reverse()(1,0) = 4; +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_rightCols_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_rightCols_int.cpp new file mode 100644 index 000000000..cb513401b --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_rightCols_int.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.rightCols(2):" << endl; +cout << a.rightCols(2) << endl; +a.rightCols(2).setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_row.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_row.cpp new file mode 100644 index 000000000..b15e6260c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_row.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Identity(); +m.row(1) = Vector3d(4,5,6); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_rowwise.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_rowwise.cpp new file mode 100644 index 000000000..ae93964ea --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_rowwise.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the sum of each row:" << endl << m.rowwise().sum() << endl; +cout << "Here is the maximum absolute value of each row:" + << endl << m.cwiseAbs().rowwise().maxCoeff() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_segment_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_segment_int_int.cpp new file mode 100644 index 000000000..70cd6d266 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_segment_int_int.cpp @@ -0,0 +1,5 @@ +RowVector4i v = RowVector4i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "Here is v.segment(1, 2):" << endl << v.segment(1, 2) << endl; +v.segment(1, 2).setZero(); +cout << "Now the vector v is:" << endl << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_select.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_select.cpp new file mode 100644 index 000000000..ae5477f02 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_select.cpp @@ -0,0 +1,6 @@ +MatrixXi m(3, 3); +m << 1, 2, 3, + 4, 5, 6, + 7, 8, 9; +m = (m.array() >= 5).select(-m, m); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_set.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_set.cpp new file mode 100644 index 000000000..50ecf5fb9 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_set.cpp @@ -0,0 +1,13 @@ +Matrix3i m1; +m1 << 1, 2, 3, + 4, 5, 6, + 7, 8, 9; +cout << m1 << endl << endl; +Matrix3i m2 = Matrix3i::Identity(); +m2.block(0,0, 2,2) << 10, 11, 12, 13; +cout << m2 << endl << endl; +Vector2i v1; +v1 << 14, 15; +m2 << v1.transpose(), 16, + v1, m1.block(1,1,2,2); +cout << m2 << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_setIdentity.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_setIdentity.cpp new file mode 100644 index 000000000..4fd0aa24a --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_setIdentity.cpp @@ -0,0 +1,3 @@ +Matrix4i m = Matrix4i::Zero(); +m.block<3,3>(1,0).setIdentity(); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_setOnes.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_setOnes.cpp new file mode 100644 index 000000000..4cef9c1eb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_setOnes.cpp @@ -0,0 +1,3 @@ +Matrix4i m = Matrix4i::Random(); +m.row(1).setOnes(); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_setRandom.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_setRandom.cpp new file mode 100644 index 000000000..e2c257d44 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_setRandom.cpp @@ -0,0 +1,3 @@ +Matrix4i m = Matrix4i::Zero(); +m.col(1).setRandom(); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_setZero.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_setZero.cpp new file mode 100644 index 000000000..9b5b9583c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_setZero.cpp @@ -0,0 +1,3 @@ +Matrix4i m = Matrix4i::Random(); +m.row(1).setZero(); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_start_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_start_int.cpp new file mode 100644 index 000000000..c261d2b4e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_start_int.cpp @@ -0,0 +1,5 @@ +RowVector4i v = RowVector4i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "Here is v.head(2):" << endl << v.head(2) << endl; +v.head(2).setZero(); +cout << "Now the vector v is:" << endl << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_bottomRows.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_bottomRows.cpp new file mode 100644 index 000000000..f9ea892da --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_bottomRows.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.bottomRows<2>():" << endl; +cout << a.bottomRows<2>() << endl; +a.bottomRows<2>().setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_end.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_end.cpp new file mode 100644 index 000000000..f5ccb00f6 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_end.cpp @@ -0,0 +1,5 @@ +RowVector4i v = RowVector4i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "Here is v.tail(2):" << endl << v.tail<2>() << endl; +v.tail<2>().setZero(); +cout << "Now the vector v is:" << endl << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_block_int_int_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_block_int_int_int_int.cpp new file mode 100644 index 000000000..4dced03ba --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_block_int_int_int_int.cpp @@ -0,0 +1,5 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the block:" << endl << m.block<2, Dynamic>(1, 1, 2, 3) << endl; +m.block<2, Dynamic>(1, 1, 2, 3).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner.cpp new file mode 100644 index 000000000..847892a27 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.bottomLeftCorner<2,2>():" << endl; +cout << m.bottomLeftCorner<2,2>() << endl; +m.bottomLeftCorner<2,2>().setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp new file mode 100644 index 000000000..a1edcc808 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomLeftCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.bottomLeftCorner<2,Dynamic>(2,2):" << endl; +cout << m.bottomLeftCorner<2,Dynamic>(2,2) << endl; +m.bottomLeftCorner<2,Dynamic>(2,2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner.cpp new file mode 100644 index 000000000..abacb014e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.bottomRightCorner<2,2>():" << endl; +cout << m.bottomRightCorner<2,2>() << endl; +m.bottomRightCorner<2,2>().setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner_int_int.cpp new file mode 100644 index 000000000..a65508fd8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_bottomRightCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.bottomRightCorner<2,Dynamic>(2,2):" << endl; +cout << m.bottomRightCorner<2,Dynamic>(2,2) << endl; +m.bottomRightCorner<2,Dynamic>(2,2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner.cpp new file mode 100644 index 000000000..1899d902d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.topLeftCorner<2,2>():" << endl; +cout << m.topLeftCorner<2,2>() << endl; +m.topLeftCorner<2,2>().setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner_int_int.cpp new file mode 100644 index 000000000..fac761f63 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topLeftCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.topLeftCorner<2,Dynamic>(2,2):" << endl; +cout << m.topLeftCorner<2,Dynamic>(2,2) << endl; +m.topLeftCorner<2,Dynamic>(2,2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner.cpp new file mode 100644 index 000000000..c3a177110 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.topRightCorner<2,2>():" << endl; +cout << m.topRightCorner<2,2>() << endl; +m.topRightCorner<2,2>().setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner_int_int.cpp new file mode 100644 index 000000000..a17acc004 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_int_topRightCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.topRightCorner<2,Dynamic>(2,2):" << endl; +cout << m.topRightCorner<2,Dynamic>(2,2) << endl; +m.topRightCorner<2,Dynamic>(2,2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_leftCols.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_leftCols.cpp new file mode 100644 index 000000000..1c425d917 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_leftCols.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.leftCols<2>():" << endl; +cout << a.leftCols<2>() << endl; +a.leftCols<2>().setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_rightCols.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_rightCols.cpp new file mode 100644 index 000000000..fc8c0d93c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_rightCols.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.rightCols<2>():" << endl; +cout << a.rightCols<2>() << endl; +a.rightCols<2>().setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_segment.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_segment.cpp new file mode 100644 index 000000000..e448b4022 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_segment.cpp @@ -0,0 +1,5 @@ +RowVector4i v = RowVector4i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "Here is v.segment<2>(1):" << endl << v.segment<2>(1) << endl; +v.segment<2>(2).setZero(); +cout << "Now the vector v is:" << endl << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_start.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_start.cpp new file mode 100644 index 000000000..d336b3716 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_start.cpp @@ -0,0 +1,5 @@ +RowVector4i v = RowVector4i::Random(); +cout << "Here is the vector v:" << endl << v << endl; +cout << "Here is v.head(2):" << endl << v.head<2>() << endl; +v.head<2>().setZero(); +cout << "Now the vector v is:" << endl << v << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_template_int_topRows.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_topRows.cpp new file mode 100644 index 000000000..0110251a5 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_template_int_topRows.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.topRows<2>():" << endl; +cout << a.topRows<2>() << endl; +a.topRows<2>().setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_topLeftCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_topLeftCorner_int_int.cpp new file mode 100644 index 000000000..e52cb3bdb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_topLeftCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.topLeftCorner(2, 2):" << endl; +cout << m.topLeftCorner(2, 2) << endl; +m.topLeftCorner(2, 2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_topRightCorner_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_topRightCorner_int_int.cpp new file mode 100644 index 000000000..811fa563e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_topRightCorner_int_int.cpp @@ -0,0 +1,6 @@ +Matrix4i m = Matrix4i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is m.topRightCorner(2, 2):" << endl; +cout << m.topRightCorner(2, 2) << endl; +m.topRightCorner(2, 2).setZero(); +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_topRows_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_topRows_int.cpp new file mode 100644 index 000000000..f2d75f1cb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_topRows_int.cpp @@ -0,0 +1,6 @@ +Array44i a = Array44i::Random(); +cout << "Here is the array a:" << endl << a << endl; +cout << "Here is a.topRows(2):" << endl; +cout << a.topRows(2) << endl; +a.topRows(2).setZero(); +cout << "Now the array a is:" << endl << a << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_transpose.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_transpose.cpp new file mode 100644 index 000000000..88eea83c4 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_transpose.cpp @@ -0,0 +1,8 @@ +Matrix2i m = Matrix2i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the transpose of m:" << endl << m.transpose() << endl; +cout << "Here is the coefficient (1,0) in the transpose of m:" << endl + << m.transpose()(1,0) << endl; +cout << "Let us overwrite this coefficient with the value 0." << endl; +m.transpose()(1,0) = 0; +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_triangularView.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_triangularView.cpp new file mode 100644 index 000000000..03aa303f0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_triangularView.cpp @@ -0,0 +1,9 @@ +Matrix3i m = Matrix3i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the upper-triangular matrix extracted from m:" << endl + << Matrix3i(m.triangularView()) << endl; +cout << "Here is the strictly-upper-triangular matrix extracted from m:" << endl + << Matrix3i(m.triangularView()) << endl; +cout << "Here is the unit-lower-triangular matrix extracted from m:" << endl + << Matrix3i(m.triangularView()) << endl; +// FIXME need to implement output for triangularViews (Bug 885) diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_zero.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_zero.cpp new file mode 100644 index 000000000..606493677 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_zero.cpp @@ -0,0 +1,2 @@ +cout << Matrix2d::Zero() << endl; +cout << RowVector4i::Zero() << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_zero_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_zero_int.cpp new file mode 100644 index 000000000..370a9ba0a --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_zero_int.cpp @@ -0,0 +1,2 @@ +cout << RowVectorXi::Zero(4) << endl; +cout << VectorXf::Zero(2) << endl; diff --git a/thirdparty/eigen/doc/snippets/MatrixBase_zero_int_int.cpp b/thirdparty/eigen/doc/snippets/MatrixBase_zero_int_int.cpp new file mode 100644 index 000000000..4099c5d4d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/MatrixBase_zero_int_int.cpp @@ -0,0 +1 @@ +cout << MatrixXi::Zero(2,3) << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_resize_NoChange_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_resize_NoChange_int.cpp new file mode 100644 index 000000000..acdf18c46 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_resize_NoChange_int.cpp @@ -0,0 +1,3 @@ +MatrixXd m(3,4); +m.resize(NoChange, 5); +cout << "m: " << m.rows() << " rows, " << m.cols() << " cols" << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_resize_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_resize_int.cpp new file mode 100644 index 000000000..044c78989 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_resize_int.cpp @@ -0,0 +1,6 @@ +VectorXd v(10); +v.resize(3); +RowVector3d w; +w.resize(3); // this is legal, but has no effect +cout << "v: " << v.rows() << " rows, " << v.cols() << " cols" << endl; +cout << "w: " << w.rows() << " rows, " << w.cols() << " cols" << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_resize_int_NoChange.cpp b/thirdparty/eigen/doc/snippets/Matrix_resize_int_NoChange.cpp new file mode 100644 index 000000000..5c37c9067 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_resize_int_NoChange.cpp @@ -0,0 +1,3 @@ +MatrixXd m(3,4); +m.resize(5, NoChange); +cout << "m: " << m.rows() << " rows, " << m.cols() << " cols" << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_resize_int_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_resize_int_int.cpp new file mode 100644 index 000000000..bfd474159 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_resize_int_int.cpp @@ -0,0 +1,9 @@ +MatrixXd m(2,3); +m << 1,2,3,4,5,6; +cout << "here's the 2x3 matrix m:" << endl << m << endl; +cout << "let's resize m to 3x2. This is a conservative resizing because 2*3==3*2." << endl; +m.resize(3,2); +cout << "here's the 3x2 matrix m:" << endl << m << endl; +cout << "now let's resize m to size 2x2. This is NOT a conservative resizing, so it becomes uninitialized:" << endl; +m.resize(2,2); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setConstant_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setConstant_int.cpp new file mode 100644 index 000000000..ff5a86c98 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setConstant_int.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setConstant(3, 5); +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setConstant_int_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setConstant_int_int.cpp new file mode 100644 index 000000000..32b950cfd --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setConstant_int_int.cpp @@ -0,0 +1,3 @@ +MatrixXf m; +m.setConstant(3, 3, 5); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setIdentity_int_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setIdentity_int_int.cpp new file mode 100644 index 000000000..a65967199 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setIdentity_int_int.cpp @@ -0,0 +1,3 @@ +MatrixXf m; +m.setIdentity(3, 3); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setOnes_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setOnes_int.cpp new file mode 100644 index 000000000..752cb35b2 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setOnes_int.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setOnes(3); +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setOnes_int_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setOnes_int_int.cpp new file mode 100644 index 000000000..1ffb66bbd --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setOnes_int_int.cpp @@ -0,0 +1,3 @@ +MatrixXf m; +m.setOnes(3, 3); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setRandom_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setRandom_int.cpp new file mode 100644 index 000000000..e160dd7df --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setRandom_int.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setRandom(3); +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setRandom_int_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setRandom_int_int.cpp new file mode 100644 index 000000000..80cda11d7 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setRandom_int_int.cpp @@ -0,0 +1,3 @@ +MatrixXf m; +m.setRandom(3, 3); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setZero_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setZero_int.cpp new file mode 100644 index 000000000..0fb16c1f3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setZero_int.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setZero(3); +cout << v << endl; diff --git a/thirdparty/eigen/doc/snippets/Matrix_setZero_int_int.cpp b/thirdparty/eigen/doc/snippets/Matrix_setZero_int_int.cpp new file mode 100644 index 000000000..ad883b916 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Matrix_setZero_int_int.cpp @@ -0,0 +1,3 @@ +MatrixXf m; +m.setZero(3, 3); +cout << m << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialPivLU_solve.cpp b/thirdparty/eigen/doc/snippets/PartialPivLU_solve.cpp new file mode 100644 index 000000000..fa3570ab8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialPivLU_solve.cpp @@ -0,0 +1,7 @@ +MatrixXd A = MatrixXd::Random(3,3); +MatrixXd B = MatrixXd::Random(3,2); +cout << "Here is the invertible matrix A:" << endl << A << endl; +cout << "Here is the matrix B:" << endl << B << endl; +MatrixXd X = A.lu().solve(B); +cout << "Here is the (unique) solution X to the equation AX=B:" << endl << X << endl; +cout << "Relative error: " << (A*X-B).norm() / B.norm() << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_count.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_count.cpp new file mode 100644 index 000000000..1c3b3a28f --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_count.cpp @@ -0,0 +1,5 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +Matrix res = (m.array() >= 0.5).rowwise().count(); +cout << "Here is the count of elements larger or equal than 0.5 of each row:" << endl; +cout << res << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_maxCoeff.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_maxCoeff.cpp new file mode 100644 index 000000000..e8fd3820d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_maxCoeff.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the maximum of each column:" << endl << m.colwise().maxCoeff() << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_minCoeff.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_minCoeff.cpp new file mode 100644 index 000000000..d717bc0d1 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_minCoeff.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the minimum of each column:" << endl << m.colwise().minCoeff() << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_norm.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_norm.cpp new file mode 100644 index 000000000..dbcf290a0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_norm.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the norm of each column:" << endl << m.colwise().norm() << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_prod.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_prod.cpp new file mode 100644 index 000000000..aacf09cbb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_prod.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the product of each row:" << endl << m.rowwise().prod() << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_squaredNorm.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_squaredNorm.cpp new file mode 100644 index 000000000..9f3293e65 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_squaredNorm.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the square norm of each row:" << endl << m.rowwise().squaredNorm() << endl; diff --git a/thirdparty/eigen/doc/snippets/PartialRedux_sum.cpp b/thirdparty/eigen/doc/snippets/PartialRedux_sum.cpp new file mode 100644 index 000000000..ec82d3e41 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/PartialRedux_sum.cpp @@ -0,0 +1,3 @@ +Matrix3d m = Matrix3d::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the sum of each row:" << endl << m.rowwise().sum() << endl; diff --git a/thirdparty/eigen/doc/snippets/RealQZ_compute.cpp b/thirdparty/eigen/doc/snippets/RealQZ_compute.cpp new file mode 100644 index 000000000..a18da42e8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/RealQZ_compute.cpp @@ -0,0 +1,17 @@ +MatrixXf A = MatrixXf::Random(4,4); +MatrixXf B = MatrixXf::Random(4,4); +RealQZ qz(4); // preallocate space for 4x4 matrices +qz.compute(A,B); // A = Q S Z, B = Q T Z + +// print original matrices and result of decomposition +cout << "A:\n" << A << "\n" << "B:\n" << B << "\n"; +cout << "S:\n" << qz.matrixS() << "\n" << "T:\n" << qz.matrixT() << "\n"; +cout << "Q:\n" << qz.matrixQ() << "\n" << "Z:\n" << qz.matrixZ() << "\n"; + +// verify precision +cout << "\nErrors:" + << "\n|A-QSZ|: " << (A-qz.matrixQ()*qz.matrixS()*qz.matrixZ()).norm() + << ", |B-QTZ|: " << (B-qz.matrixQ()*qz.matrixT()*qz.matrixZ()).norm() + << "\n|QQ* - I|: " << (qz.matrixQ()*qz.matrixQ().adjoint() - MatrixXf::Identity(4,4)).norm() + << ", |ZZ* - I|: " << (qz.matrixZ()*qz.matrixZ().adjoint() - MatrixXf::Identity(4,4)).norm() + << "\n"; diff --git a/thirdparty/eigen/doc/snippets/RealSchur_RealSchur_MatrixType.cpp b/thirdparty/eigen/doc/snippets/RealSchur_RealSchur_MatrixType.cpp new file mode 100644 index 000000000..a5530dcc8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/RealSchur_RealSchur_MatrixType.cpp @@ -0,0 +1,10 @@ +MatrixXd A = MatrixXd::Random(6,6); +cout << "Here is a random 6x6 matrix, A:" << endl << A << endl << endl; + +RealSchur schur(A); +cout << "The orthogonal matrix U is:" << endl << schur.matrixU() << endl; +cout << "The quasi-triangular matrix T is:" << endl << schur.matrixT() << endl << endl; + +MatrixXd U = schur.matrixU(); +MatrixXd T = schur.matrixT(); +cout << "U * T * U^T = " << endl << U * T * U.transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/RealSchur_compute.cpp b/thirdparty/eigen/doc/snippets/RealSchur_compute.cpp new file mode 100644 index 000000000..20c2611b8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/RealSchur_compute.cpp @@ -0,0 +1,6 @@ +MatrixXf A = MatrixXf::Random(4,4); +RealSchur schur(4); +schur.compute(A, /* computeU = */ false); +cout << "The matrix T in the decomposition of A is:" << endl << schur.matrixT() << endl; +schur.compute(A.inverse(), /* computeU = */ false); +cout << "The matrix T in the decomposition of A^(-1) is:" << endl << schur.matrixT() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp new file mode 100644 index 000000000..73a7f6252 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp @@ -0,0 +1,7 @@ +SelfAdjointEigenSolver es; +Matrix4f X = Matrix4f::Random(4,4); +Matrix4f A = X + X.transpose(); +es.compute(A); +cout << "The eigenvalues of A are: " << es.eigenvalues().transpose() << endl; +es.compute(A + Matrix4f::Identity(4,4)); // re-use es to compute eigenvalues of A+I +cout << "The eigenvalues of A+I are: " << es.eigenvalues().transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp new file mode 100644 index 000000000..3599b17a0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType.cpp @@ -0,0 +1,17 @@ +MatrixXd X = MatrixXd::Random(5,5); +MatrixXd A = X + X.transpose(); +cout << "Here is a random symmetric 5x5 matrix, A:" << endl << A << endl << endl; + +SelfAdjointEigenSolver es(A); +cout << "The eigenvalues of A are:" << endl << es.eigenvalues() << endl; +cout << "The matrix of eigenvectors, V, is:" << endl << es.eigenvectors() << endl << endl; + +double lambda = es.eigenvalues()[0]; +cout << "Consider the first eigenvalue, lambda = " << lambda << endl; +VectorXd v = es.eigenvectors().col(0); +cout << "If v is the corresponding eigenvector, then lambda * v = " << endl << lambda * v << endl; +cout << "... and A * v = " << endl << A * v << endl << endl; + +MatrixXd D = es.eigenvalues().asDiagonal(); +MatrixXd V = es.eigenvectors(); +cout << "Finally, V * D * V^(-1) = " << endl << V * D * V.inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp new file mode 100644 index 000000000..bbb821e02 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_SelfAdjointEigenSolver_MatrixType2.cpp @@ -0,0 +1,16 @@ +MatrixXd X = MatrixXd::Random(5,5); +MatrixXd A = X + X.transpose(); +cout << "Here is a random symmetric matrix, A:" << endl << A << endl; +X = MatrixXd::Random(5,5); +MatrixXd B = X * X.transpose(); +cout << "and a random postive-definite matrix, B:" << endl << B << endl << endl; + +GeneralizedSelfAdjointEigenSolver es(A,B); +cout << "The eigenvalues of the pencil (A,B) are:" << endl << es.eigenvalues() << endl; +cout << "The matrix of eigenvectors, V, is:" << endl << es.eigenvectors() << endl << endl; + +double lambda = es.eigenvalues()[0]; +cout << "Consider the first eigenvalue, lambda = " << lambda << endl; +VectorXd v = es.eigenvectors().col(0); +cout << "If v is the corresponding eigenvector, then A * v = " << endl << A * v << endl; +cout << "... and lambda * B * v = " << endl << lambda * B * v << endl << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType.cpp new file mode 100644 index 000000000..2975cc3f2 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType.cpp @@ -0,0 +1,7 @@ +SelfAdjointEigenSolver es(4); +MatrixXf X = MatrixXf::Random(4,4); +MatrixXf A = X + X.transpose(); +es.compute(A); +cout << "The eigenvalues of A are: " << es.eigenvalues().transpose() << endl; +es.compute(A + MatrixXf::Identity(4,4)); // re-use es to compute eigenvalues of A+I +cout << "The eigenvalues of A+I are: " << es.eigenvalues().transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType2.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType2.cpp new file mode 100644 index 000000000..07c92a1e4 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_compute_MatrixType2.cpp @@ -0,0 +1,9 @@ +MatrixXd X = MatrixXd::Random(5,5); +MatrixXd A = X * X.transpose(); +X = MatrixXd::Random(5,5); +MatrixXd B = X * X.transpose(); + +GeneralizedSelfAdjointEigenSolver es(A,B,EigenvaluesOnly); +cout << "The eigenvalues of the pencil (A,B) are:" << endl << es.eigenvalues() << endl; +es.compute(B,A,false); +cout << "The eigenvalues of the pencil (B,A) are:" << endl << es.eigenvalues() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvalues.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvalues.cpp new file mode 100644 index 000000000..0ff33c68d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvalues.cpp @@ -0,0 +1,4 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +SelfAdjointEigenSolver es(ones); +cout << "The eigenvalues of the 3x3 matrix of ones are:" + << endl << es.eigenvalues() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvectors.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvectors.cpp new file mode 100644 index 000000000..cfc8b0d54 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_eigenvectors.cpp @@ -0,0 +1,4 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +SelfAdjointEigenSolver es(ones); +cout << "The first eigenvector of the 3x3 matrix of ones is:" + << endl << es.eigenvectors().col(1) << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorInverseSqrt.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorInverseSqrt.cpp new file mode 100644 index 000000000..114c65fb3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorInverseSqrt.cpp @@ -0,0 +1,9 @@ +MatrixXd X = MatrixXd::Random(4,4); +MatrixXd A = X * X.transpose(); +cout << "Here is a random positive-definite matrix, A:" << endl << A << endl << endl; + +SelfAdjointEigenSolver es(A); +cout << "The inverse square root of A is: " << endl; +cout << es.operatorInverseSqrt() << endl; +cout << "We can also compute it with operatorSqrt() and inverse(). That yields: " << endl; +cout << es.operatorSqrt().inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorSqrt.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorSqrt.cpp new file mode 100644 index 000000000..eeacca74b --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointEigenSolver_operatorSqrt.cpp @@ -0,0 +1,8 @@ +MatrixXd X = MatrixXd::Random(4,4); +MatrixXd A = X * X.transpose(); +cout << "Here is a random positive-definite matrix, A:" << endl << A << endl << endl; + +SelfAdjointEigenSolver es(A); +MatrixXd sqrtA = es.operatorSqrt(); +cout << "The square root of A is: " << endl << sqrtA << endl; +cout << "If we square this, we get: " << endl << sqrtA*sqrtA << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointView_eigenvalues.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointView_eigenvalues.cpp new file mode 100644 index 000000000..be1986778 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointView_eigenvalues.cpp @@ -0,0 +1,3 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +VectorXd eivals = ones.selfadjointView().eigenvalues(); +cout << "The eigenvalues of the 3x3 matrix of ones are:" << endl << eivals << endl; diff --git a/thirdparty/eigen/doc/snippets/SelfAdjointView_operatorNorm.cpp b/thirdparty/eigen/doc/snippets/SelfAdjointView_operatorNorm.cpp new file mode 100644 index 000000000..f380f5594 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SelfAdjointView_operatorNorm.cpp @@ -0,0 +1,3 @@ +MatrixXd ones = MatrixXd::Ones(3,3); +cout << "The operator norm of the 3x3 matrix of ones is " + << ones.selfadjointView().operatorNorm() << endl; diff --git a/thirdparty/eigen/doc/snippets/SparseMatrix_coeffs.cpp b/thirdparty/eigen/doc/snippets/SparseMatrix_coeffs.cpp new file mode 100644 index 000000000..f71a69b07 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/SparseMatrix_coeffs.cpp @@ -0,0 +1,9 @@ +SparseMatrix A(3,3); +A.insert(1,2) = 0; +A.insert(0,1) = 1; +A.insert(2,0) = 2; +A.makeCompressed(); +cout << "The matrix A is:" << endl << MatrixXd(A) << endl; +cout << "it has " << A.nonZeros() << " stored non zero coefficients that are: " << A.coeffs().transpose() << endl; +A.coeffs() += 10; +cout << "After adding 10 to every stored non zero coefficient, the matrix A is:" << endl << MatrixXd(A) << endl; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_block.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_block.cpp new file mode 100644 index 000000000..03282f4f0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_block.cpp @@ -0,0 +1,7 @@ +MatrixXi mat(3,3); +mat << 1, 2, 3, 4, 5, 6, 7, 8, 9; +cout << "Here is the matrix mat:\n" << mat << endl; + +// This assignment shows the aliasing problem +mat.bottomRightCorner(2,2) = mat.topLeftCorner(2,2); +cout << "After the assignment, mat = \n" << mat << endl; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_block_correct.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_block_correct.cpp new file mode 100644 index 000000000..6fee5801e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_block_correct.cpp @@ -0,0 +1,7 @@ +MatrixXi mat(3,3); +mat << 1, 2, 3, 4, 5, 6, 7, 8, 9; +cout << "Here is the matrix mat:\n" << mat << endl; + +// The eval() solves the aliasing problem +mat.bottomRightCorner(2,2) = mat.topLeftCorner(2,2).eval(); +cout << "After the assignment, mat = \n" << mat << endl; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_cwise.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_cwise.cpp new file mode 100644 index 000000000..7049f6c56 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_cwise.cpp @@ -0,0 +1,20 @@ +MatrixXf mat(2,2); +mat << 1, 2, 4, 7; +cout << "Here is the matrix mat:\n" << mat << endl << endl; + +mat = 2 * mat; +cout << "After 'mat = 2 * mat', mat = \n" << mat << endl << endl; + + +mat = mat - MatrixXf::Identity(2,2); +cout << "After the subtraction, it becomes\n" << mat << endl << endl; + + +ArrayXXf arr = mat; +arr = arr.square(); +cout << "After squaring, it becomes\n" << arr << endl << endl; + +// Combining all operations in one statement: +mat << 1, 2, 4, 7; +mat = (2 * mat - MatrixXf::Identity(2,2)).array().square(); +cout << "Doing everything at once yields\n" << mat << endl << endl; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_mult1.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_mult1.cpp new file mode 100644 index 000000000..cd7e9004c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_mult1.cpp @@ -0,0 +1,4 @@ +MatrixXf matA(2,2); +matA << 2, 0, 0, 2; +matA = matA * matA; +cout << matA; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_mult2.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_mult2.cpp new file mode 100644 index 000000000..a3ff56851 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_mult2.cpp @@ -0,0 +1,10 @@ +MatrixXf matA(2,2), matB(2,2); +matA << 2, 0, 0, 2; + +// Simple but not quite as efficient +matB = matA * matA; +cout << matB << endl << endl; + +// More complicated but also more efficient +matB.noalias() = matA * matA; +cout << matB; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_mult3.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_mult3.cpp new file mode 100644 index 000000000..1d12a6c67 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_mult3.cpp @@ -0,0 +1,4 @@ +MatrixXf matA(2,2); +matA << 2, 0, 0, 2; +matA.noalias() = matA * matA; +cout << matA; diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_mult4.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_mult4.cpp new file mode 100644 index 000000000..8a8992f6c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_mult4.cpp @@ -0,0 +1,5 @@ +MatrixXf A(2,2), B(3,2); +B << 2, 0, 0, 3, 1, 1; +A << 2, 0, 0, -2; +A = (B * A).cwiseAbs(); +cout << A; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/TopicAliasing_mult5.cpp b/thirdparty/eigen/doc/snippets/TopicAliasing_mult5.cpp new file mode 100644 index 000000000..1a36defde --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicAliasing_mult5.cpp @@ -0,0 +1,5 @@ +MatrixXf A(2,2), B(3,2); +B << 2, 0, 0, 3, 1, 1; +A << 2, 0, 0, -2; +A = (B * A).eval().cwiseAbs(); +cout << A; diff --git a/thirdparty/eigen/doc/snippets/TopicStorageOrders_example.cpp b/thirdparty/eigen/doc/snippets/TopicStorageOrders_example.cpp new file mode 100644 index 000000000..0623ef0c2 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/TopicStorageOrders_example.cpp @@ -0,0 +1,18 @@ +Matrix Acolmajor; +Acolmajor << 8, 2, 2, 9, + 9, 1, 4, 4, + 3, 5, 4, 5; +cout << "The matrix A:" << endl; +cout << Acolmajor << endl << endl; + +cout << "In memory (column-major):" << endl; +for (int i = 0; i < Acolmajor.size(); i++) + cout << *(Acolmajor.data() + i) << " "; +cout << endl << endl; + +Matrix Arowmajor = Acolmajor; +cout << "In memory (row-major):" << endl; +for (int i = 0; i < Arowmajor.size(); i++) + cout << *(Arowmajor.data() + i) << " "; +cout << endl; + diff --git a/thirdparty/eigen/doc/snippets/Triangular_solve.cpp b/thirdparty/eigen/doc/snippets/Triangular_solve.cpp new file mode 100644 index 000000000..548442467 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Triangular_solve.cpp @@ -0,0 +1,11 @@ +Matrix3d m = Matrix3d::Zero(); +m.triangularView().setOnes(); +cout << "Here is the matrix m:\n" << m << endl; +Matrix3d n = Matrix3d::Ones(); +n.triangularView() *= 2; +cout << "Here is the matrix n:\n" << n << endl; +cout << "And now here is m.inverse()*n, taking advantage of the fact that" + " m is upper-triangular:\n" + << m.triangularView().solve(n) << endl; +cout << "And this is n*m.inverse():\n" + << m.triangularView().solve(n); diff --git a/thirdparty/eigen/doc/snippets/Tridiagonalization_Tridiagonalization_MatrixType.cpp b/thirdparty/eigen/doc/snippets/Tridiagonalization_Tridiagonalization_MatrixType.cpp new file mode 100644 index 000000000..a26012433 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tridiagonalization_Tridiagonalization_MatrixType.cpp @@ -0,0 +1,9 @@ +MatrixXd X = MatrixXd::Random(5,5); +MatrixXd A = X + X.transpose(); +cout << "Here is a random symmetric 5x5 matrix:" << endl << A << endl << endl; +Tridiagonalization triOfA(A); +MatrixXd Q = triOfA.matrixQ(); +cout << "The orthogonal matrix Q is:" << endl << Q << endl; +MatrixXd T = triOfA.matrixT(); +cout << "The tridiagonal matrix T is:" << endl << T << endl << endl; +cout << "Q * T * Q^T = " << endl << Q * T * Q.transpose() << endl; diff --git a/thirdparty/eigen/doc/snippets/Tridiagonalization_compute.cpp b/thirdparty/eigen/doc/snippets/Tridiagonalization_compute.cpp new file mode 100644 index 000000000..0062a99e8 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tridiagonalization_compute.cpp @@ -0,0 +1,9 @@ +Tridiagonalization tri; +MatrixXf X = MatrixXf::Random(4,4); +MatrixXf A = X + X.transpose(); +tri.compute(A); +cout << "The matrix T in the tridiagonal decomposition of A is: " << endl; +cout << tri.matrixT() << endl; +tri.compute(2*A); // re-use tri to compute eigenvalues of 2A +cout << "The matrix T in the tridiagonal decomposition of 2A is: " << endl; +cout << tri.matrixT() << endl; diff --git a/thirdparty/eigen/doc/snippets/Tridiagonalization_decomposeInPlace.cpp b/thirdparty/eigen/doc/snippets/Tridiagonalization_decomposeInPlace.cpp new file mode 100644 index 000000000..93dcfca1d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tridiagonalization_decomposeInPlace.cpp @@ -0,0 +1,10 @@ +MatrixXd X = MatrixXd::Random(5,5); +MatrixXd A = X + X.transpose(); +cout << "Here is a random symmetric 5x5 matrix:" << endl << A << endl << endl; + +VectorXd diag(5); +VectorXd subdiag(4); +internal::tridiagonalization_inplace(A, diag, subdiag, true); +cout << "The orthogonal matrix Q is:" << endl << A << endl; +cout << "The diagonal of the tridiagonal matrix T is:" << endl << diag << endl; +cout << "The subdiagonal of the tridiagonal matrix T is:" << endl << subdiag << endl; diff --git a/thirdparty/eigen/doc/snippets/Tridiagonalization_diagonal.cpp b/thirdparty/eigen/doc/snippets/Tridiagonalization_diagonal.cpp new file mode 100644 index 000000000..6eec82169 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tridiagonalization_diagonal.cpp @@ -0,0 +1,13 @@ +MatrixXcd X = MatrixXcd::Random(4,4); +MatrixXcd A = X + X.adjoint(); +cout << "Here is a random self-adjoint 4x4 matrix:" << endl << A << endl << endl; + +Tridiagonalization triOfA(A); +MatrixXd T = triOfA.matrixT(); +cout << "The tridiagonal matrix T is:" << endl << T << endl << endl; + +cout << "We can also extract the diagonals of T directly ..." << endl; +VectorXd diag = triOfA.diagonal(); +cout << "The diagonal is:" << endl << diag << endl; +VectorXd subdiag = triOfA.subDiagonal(); +cout << "The subdiagonal is:" << endl << subdiag << endl; diff --git a/thirdparty/eigen/doc/snippets/Tridiagonalization_householderCoefficients.cpp b/thirdparty/eigen/doc/snippets/Tridiagonalization_householderCoefficients.cpp new file mode 100644 index 000000000..e5d872880 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tridiagonalization_householderCoefficients.cpp @@ -0,0 +1,6 @@ +Matrix4d X = Matrix4d::Random(4,4); +Matrix4d A = X + X.transpose(); +cout << "Here is a random symmetric 4x4 matrix:" << endl << A << endl; +Tridiagonalization triOfA(A); +Vector3d hc = triOfA.householderCoefficients(); +cout << "The vector of Householder coefficients is:" << endl << hc << endl; diff --git a/thirdparty/eigen/doc/snippets/Tridiagonalization_packedMatrix.cpp b/thirdparty/eigen/doc/snippets/Tridiagonalization_packedMatrix.cpp new file mode 100644 index 000000000..0f55d0c28 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tridiagonalization_packedMatrix.cpp @@ -0,0 +1,8 @@ +Matrix4d X = Matrix4d::Random(4,4); +Matrix4d A = X + X.transpose(); +cout << "Here is a random symmetric 4x4 matrix:" << endl << A << endl; +Tridiagonalization triOfA(A); +Matrix4d pm = triOfA.packedMatrix(); +cout << "The packed matrix M is:" << endl << pm << endl; +cout << "The diagonal and subdiagonal corresponds to the matrix T, which is:" + << endl << triOfA.matrixT() << endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Block.cpp b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Block.cpp new file mode 100644 index 000000000..96e40acfb --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Block.cpp @@ -0,0 +1,5 @@ +MatrixXf matA(2, 2); +matA << 1, 2, 3, 4; +MatrixXf matB(4, 4); +matB << matA, matA/10, matA/10, matA; +std::cout << matB << std::endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_CommaTemporary.cpp b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_CommaTemporary.cpp new file mode 100644 index 000000000..50cff4cb6 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_CommaTemporary.cpp @@ -0,0 +1,4 @@ +MatrixXf mat = MatrixXf::Random(2, 3); +std::cout << mat << std::endl << std::endl; +mat = (MatrixXf(2,2) << 0, 1, 1, 0).finished() * mat; +std::cout << mat << std::endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp new file mode 100644 index 000000000..55a21539d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp @@ -0,0 +1,11 @@ +RowVectorXd vec1(3); +vec1 << 1, 2, 3; +std::cout << "vec1 = " << vec1 << std::endl; + +RowVectorXd vec2(4); +vec2 << 1, 4, 9, 16; +std::cout << "vec2 = " << vec2 << std::endl; + +RowVectorXd joined(7); +joined << vec1, vec2; +std::cout << "joined = " << joined << std::endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_LinSpaced.cpp b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_LinSpaced.cpp new file mode 100644 index 000000000..c6a73ab8c --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_LinSpaced.cpp @@ -0,0 +1,7 @@ +ArrayXXf table(10, 4); +table.col(0) = ArrayXf::LinSpaced(10, 0, 90); +table.col(1) = M_PI / 180 * table.col(0); +table.col(2) = table.col(1).sin(); +table.col(3) = table.col(1).cos(); +std::cout << " Degrees Radians Sine Cosine\n"; +std::cout << table << std::endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_ThreeWays.cpp b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_ThreeWays.cpp new file mode 100644 index 000000000..cb7457652 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_ThreeWays.cpp @@ -0,0 +1,20 @@ +const int size = 6; +MatrixXd mat1(size, size); +mat1.topLeftCorner(size/2, size/2) = MatrixXd::Zero(size/2, size/2); +mat1.topRightCorner(size/2, size/2) = MatrixXd::Identity(size/2, size/2); +mat1.bottomLeftCorner(size/2, size/2) = MatrixXd::Identity(size/2, size/2); +mat1.bottomRightCorner(size/2, size/2) = MatrixXd::Zero(size/2, size/2); +std::cout << mat1 << std::endl << std::endl; + +MatrixXd mat2(size, size); +mat2.topLeftCorner(size/2, size/2).setZero(); +mat2.topRightCorner(size/2, size/2).setIdentity(); +mat2.bottomLeftCorner(size/2, size/2).setIdentity(); +mat2.bottomRightCorner(size/2, size/2).setZero(); +std::cout << mat2 << std::endl << std::endl; + +MatrixXd mat3(size, size); +mat3 << MatrixXd::Zero(size/2, size/2), MatrixXd::Identity(size/2, size/2), + MatrixXd::Identity(size/2, size/2), MatrixXd::Zero(size/2, size/2); +std::cout << mat3 << std::endl; + diff --git a/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Zero.cpp b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Zero.cpp new file mode 100644 index 000000000..76a36a319 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_AdvancedInitialization_Zero.cpp @@ -0,0 +1,13 @@ +std::cout << "A fixed-size array:\n"; +Array33f a1 = Array33f::Zero(); +std::cout << a1 << "\n\n"; + + +std::cout << "A one-dimensional dynamic-size array:\n"; +ArrayXf a2 = ArrayXf::Zero(3); +std::cout << a2 << "\n\n"; + + +std::cout << "A two-dimensional dynamic-size array:\n"; +ArrayXXf a3 = ArrayXXf::Zero(3, 4); +std::cout << a3 << "\n"; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_Map_rowmajor.cpp b/thirdparty/eigen/doc/snippets/Tutorial_Map_rowmajor.cpp new file mode 100644 index 000000000..fd45ace03 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_Map_rowmajor.cpp @@ -0,0 +1,7 @@ +int array[8]; +for(int i = 0; i < 8; ++i) array[i] = i; +cout << "Column-major:\n" << Map >(array) << endl; +cout << "Row-major:\n" << Map >(array) << endl; +cout << "Row-major using stride:\n" << + Map, Unaligned, Stride<1,4> >(array) << endl; + diff --git a/thirdparty/eigen/doc/snippets/Tutorial_Map_using.cpp b/thirdparty/eigen/doc/snippets/Tutorial_Map_using.cpp new file mode 100644 index 000000000..e5e499f1f --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_Map_using.cpp @@ -0,0 +1,21 @@ +typedef Matrix MatrixType; +typedef Map MapType; +typedef Map MapTypeConst; // a read-only map +const int n_dims = 5; + +MatrixType m1(n_dims), m2(n_dims); +m1.setRandom(); +m2.setRandom(); +float *p = &m2(0); // get the address storing the data for m2 +MapType m2map(p,m2.size()); // m2map shares data with m2 +MapTypeConst m2mapconst(p,m2.size()); // a read-only accessor for m2 + +cout << "m1: " << m1 << endl; +cout << "m2: " << m2 << endl; +cout << "Squared euclidean distance: " << (m1-m2).squaredNorm() << endl; +cout << "Squared euclidean distance, using map: " << + (m1-m2map).squaredNorm() << endl; +m2map(3) = 7; // this will change m2, since they share the same array +cout << "Updated m2: " << m2 << endl; +cout << "m2 coefficient 2, constant accessor: " << m2mapconst(2) << endl; +/* m2mapconst(2) = 5; */ // this yields a compile-time error diff --git a/thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Mat.cpp b/thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Mat.cpp new file mode 100644 index 000000000..f84d6e76d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Mat.cpp @@ -0,0 +1,6 @@ +MatrixXf M1(2,6); // Column-major storage +M1 << 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12; + +Map M2(M1.data(), 6,2); +cout << "M2:" << endl << M2 << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Vec.cpp b/thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Vec.cpp new file mode 100644 index 000000000..95bd4e0e6 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_ReshapeMat2Vec.cpp @@ -0,0 +1,11 @@ +MatrixXf M1(3,3); // Column-major storage +M1 << 1, 2, 3, + 4, 5, 6, + 7, 8, 9; + +Map v1(M1.data(), M1.size()); +cout << "v1:" << endl << v1 << endl; + +Matrix M2(M1); +Map v2(M2.data(), M2.size()); +cout << "v2:" << endl << v2 << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Tutorial_SlicingCol.cpp b/thirdparty/eigen/doc/snippets/Tutorial_SlicingCol.cpp new file mode 100644 index 000000000..f667ff689 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_SlicingCol.cpp @@ -0,0 +1,11 @@ +MatrixXf M1 = MatrixXf::Random(3,8); +cout << "Column major input:" << endl << M1 << "\n"; +Map > M2(M1.data(), M1.rows(), (M1.cols()+2)/3, OuterStride<>(M1.outerStride()*3)); +cout << "1 column over 3:" << endl << M2 << "\n"; + +typedef Matrix RowMajorMatrixXf; +RowMajorMatrixXf M3(M1); +cout << "Row major input:" << endl << M3 << "\n"; +Map > M4(M3.data(), M3.rows(), (M3.cols()+2)/3, + Stride(M3.outerStride(),3)); +cout << "1 column over 3:" << endl << M4 << "\n"; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Tutorial_SlicingVec.cpp b/thirdparty/eigen/doc/snippets/Tutorial_SlicingVec.cpp new file mode 100644 index 000000000..07e10bf69 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_SlicingVec.cpp @@ -0,0 +1,4 @@ +RowVectorXf v = RowVectorXf::LinSpaced(20,0,19); +cout << "Input:" << endl << v << endl; +Map > v2(v.data(), v.size()/2); +cout << "Even:" << v2 << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Tutorial_commainit_01.cpp b/thirdparty/eigen/doc/snippets/Tutorial_commainit_01.cpp new file mode 100644 index 000000000..47ba31dc9 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_commainit_01.cpp @@ -0,0 +1,5 @@ +Matrix3f m; +m << 1, 2, 3, + 4, 5, 6, + 7, 8, 9; +std::cout << m; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_commainit_01b.cpp b/thirdparty/eigen/doc/snippets/Tutorial_commainit_01b.cpp new file mode 100644 index 000000000..2adb2e213 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_commainit_01b.cpp @@ -0,0 +1,5 @@ +Matrix3f m; +m.row(0) << 1, 2, 3; +m.block(1,0,2,2) << 4, 5, 7, 8; +m.col(2).tail(2) << 6, 9; +std::cout << m; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_commainit_02.cpp b/thirdparty/eigen/doc/snippets/Tutorial_commainit_02.cpp new file mode 100644 index 000000000..c960d6ab5 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_commainit_02.cpp @@ -0,0 +1,7 @@ +int rows=5, cols=5; +MatrixXf m(rows,cols); +m << (Matrix3f() << 1, 2, 3, 4, 5, 6, 7, 8, 9).finished(), + MatrixXf::Zero(3,cols-3), + MatrixXf::Zero(rows-3,3), + MatrixXf::Identity(rows-3,cols-3); +cout << m; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_solve_matrix_inverse.cpp b/thirdparty/eigen/doc/snippets/Tutorial_solve_matrix_inverse.cpp new file mode 100644 index 000000000..fff324446 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_solve_matrix_inverse.cpp @@ -0,0 +1,6 @@ +Matrix3f A; +Vector3f b; +A << 1,2,3, 4,5,6, 7,8,10; +b << 3, 3, 4; +Vector3f x = A.inverse() * b; +cout << "The solution is:" << endl << x << endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_solve_multiple_rhs.cpp b/thirdparty/eigen/doc/snippets/Tutorial_solve_multiple_rhs.cpp new file mode 100644 index 000000000..5411a44ab --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_solve_multiple_rhs.cpp @@ -0,0 +1,10 @@ +Matrix3f A(3,3); +A << 1,2,3, 4,5,6, 7,8,10; +Matrix B; +B << 3,1, 3,1, 4,1; +Matrix X; +X = A.fullPivLu().solve(B); +cout << "The solution with right-hand side (3,3,4) is:" << endl; +cout << X.col(0) << endl; +cout << "The solution with right-hand side (1,1,1) is:" << endl; +cout << X.col(1) << endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_solve_reuse_decomposition.cpp b/thirdparty/eigen/doc/snippets/Tutorial_solve_reuse_decomposition.cpp new file mode 100644 index 000000000..3ca06453a --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_solve_reuse_decomposition.cpp @@ -0,0 +1,13 @@ +Matrix3f A(3,3); +A << 1,2,3, 4,5,6, 7,8,10; +PartialPivLU luOfA(A); // compute LU decomposition of A +Vector3f b; +b << 3,3,4; +Vector3f x; +x = luOfA.solve(b); +cout << "The solution with right-hand side (3,3,4) is:" << endl; +cout << x << endl; +b << 1,1,1; +x = luOfA.solve(b); +cout << "The solution with right-hand side (1,1,1) is:" << endl; +cout << x << endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_solve_singular.cpp b/thirdparty/eigen/doc/snippets/Tutorial_solve_singular.cpp new file mode 100644 index 000000000..abff1ef73 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_solve_singular.cpp @@ -0,0 +1,9 @@ +Matrix3f A; +Vector3f b; +A << 1,2,3, 4,5,6, 7,8,9; +b << 3, 3, 4; +cout << "Here is the matrix A:" << endl << A << endl; +cout << "Here is the vector b:" << endl << b << endl; +Vector3f x; +x = A.lu().solve(b); +cout << "The solution is:" << endl << x << endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_solve_triangular.cpp b/thirdparty/eigen/doc/snippets/Tutorial_solve_triangular.cpp new file mode 100644 index 000000000..9d13f22ec --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_solve_triangular.cpp @@ -0,0 +1,8 @@ +Matrix3f A; +Vector3f b; +A << 1,2,3, 0,5,6, 0,0,10; +b << 3, 3, 4; +cout << "Here is the matrix A:" << endl << A << endl; +cout << "Here is the vector b:" << endl << b << endl; +Vector3f x = A.triangularView().solve(b); +cout << "The solution is:" << endl << x << endl; diff --git a/thirdparty/eigen/doc/snippets/Tutorial_solve_triangular_inplace.cpp b/thirdparty/eigen/doc/snippets/Tutorial_solve_triangular_inplace.cpp new file mode 100644 index 000000000..16ae633a3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Tutorial_solve_triangular_inplace.cpp @@ -0,0 +1,6 @@ +Matrix3f A; +Vector3f b; +A << 1,2,3, 0,5,6, 0,0,10; +b << 3, 3, 4; +A.triangularView().solveInPlace(b); +cout << "The solution is:" << endl << b << endl; diff --git a/thirdparty/eigen/doc/snippets/VectorwiseOp_homogeneous.cpp b/thirdparty/eigen/doc/snippets/VectorwiseOp_homogeneous.cpp new file mode 100644 index 000000000..aba4fed0e --- /dev/null +++ b/thirdparty/eigen/doc/snippets/VectorwiseOp_homogeneous.cpp @@ -0,0 +1,7 @@ +typedef Matrix Matrix3Xd; +Matrix3Xd M = Matrix3Xd::Random(3,5); +Projective3d P(Matrix4d::Random()); +cout << "The matrix M is:" << endl << M << endl << endl; +cout << "M.colwise().homogeneous():" << endl << M.colwise().homogeneous() << endl << endl; +cout << "P * M.colwise().homogeneous():" << endl << P * M.colwise().homogeneous() << endl << endl; +cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/Vectorwise_reverse.cpp b/thirdparty/eigen/doc/snippets/Vectorwise_reverse.cpp new file mode 100644 index 000000000..2f6a35080 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/Vectorwise_reverse.cpp @@ -0,0 +1,10 @@ +MatrixXi m = MatrixXi::Random(3,4); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the rowwise reverse of m:" << endl << m.rowwise().reverse() << endl; +cout << "Here is the colwise reverse of m:" << endl << m.colwise().reverse() << endl; + +cout << "Here is the coefficient (1,0) in the rowise reverse of m:" << endl +<< m.rowwise().reverse()(1,0) << endl; +cout << "Let us overwrite this coefficient with the value 4." << endl; +//m.colwise().reverse()(1,0) = 4; +cout << "Now the matrix m is:" << endl << m << endl; diff --git a/thirdparty/eigen/doc/snippets/class_FullPivLU.cpp b/thirdparty/eigen/doc/snippets/class_FullPivLU.cpp new file mode 100644 index 000000000..fce7fac09 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/class_FullPivLU.cpp @@ -0,0 +1,16 @@ +typedef Matrix Matrix5x3; +typedef Matrix Matrix5x5; +Matrix5x3 m = Matrix5x3::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +Eigen::FullPivLU lu(m); +cout << "Here is, up to permutations, its LU decomposition matrix:" + << endl << lu.matrixLU() << endl; +cout << "Here is the L part:" << endl; +Matrix5x5 l = Matrix5x5::Identity(); +l.block<5,3>(0,0).triangularView() = lu.matrixLU(); +cout << l << endl; +cout << "Here is the U part:" << endl; +Matrix5x3 u = lu.matrixLU().triangularView(); +cout << u << endl; +cout << "Let us now reconstruct the original matrix m:" << endl; +cout << lu.permutationP().inverse() * l * u * lu.permutationQ().inverse() << endl; diff --git a/thirdparty/eigen/doc/snippets/compile_snippet.cpp.in b/thirdparty/eigen/doc/snippets/compile_snippet.cpp.in new file mode 100644 index 000000000..d63f371a3 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/compile_snippet.cpp.in @@ -0,0 +1,20 @@ +static bool eigen_did_assert = false; +#define eigen_assert(X) if(!eigen_did_assert && !(X)){ std::cout << "### Assertion raised in " << __FILE__ << ":" << __LINE__ << ":\n" #X << "\n### The following would happen without assertions:\n"; eigen_did_assert = true;} + +#include +#include + +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + + +using namespace Eigen; +using namespace std; + +int main(int, char**) +{ + cout.precision(3); + ${snippet_source_code} + return 0; +} diff --git a/thirdparty/eigen/doc/snippets/tut_arithmetic_redux_minmax.cpp b/thirdparty/eigen/doc/snippets/tut_arithmetic_redux_minmax.cpp new file mode 100644 index 000000000..f4ae7f406 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/tut_arithmetic_redux_minmax.cpp @@ -0,0 +1,12 @@ + Matrix3f m = Matrix3f::Random(); + std::ptrdiff_t i, j; + float minOfM = m.minCoeff(&i,&j); + cout << "Here is the matrix m:\n" << m << endl; + cout << "Its minimum coefficient (" << minOfM + << ") is at position (" << i << "," << j << ")\n\n"; + + RowVector4i v = RowVector4i::Random(); + int maxOfV = v.maxCoeff(&i); + cout << "Here is the vector v: " << v << endl; + cout << "Its maximum coefficient (" << maxOfV + << ") is at position " << i << endl; diff --git a/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_aliasing.cpp b/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_aliasing.cpp new file mode 100644 index 000000000..c8e4746d0 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_aliasing.cpp @@ -0,0 +1,5 @@ +Matrix2i a; a << 1, 2, 3, 4; +cout << "Here is the matrix a:\n" << a << endl; + +a = a.transpose(); // !!! do NOT do this !!! +cout << "and the result of the aliasing effect:\n" << a << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_conjugate.cpp b/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_conjugate.cpp new file mode 100644 index 000000000..88496b22d --- /dev/null +++ b/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_conjugate.cpp @@ -0,0 +1,12 @@ +MatrixXcf a = MatrixXcf::Random(2,2); +cout << "Here is the matrix a\n" << a << endl; + +cout << "Here is the matrix a^T\n" << a.transpose() << endl; + + +cout << "Here is the conjugate of a\n" << a.conjugate() << endl; + + +cout << "Here is the matrix a^*\n" << a.adjoint() << endl; + + diff --git a/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_inplace.cpp b/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_inplace.cpp new file mode 100644 index 000000000..7a069ff23 --- /dev/null +++ b/thirdparty/eigen/doc/snippets/tut_arithmetic_transpose_inplace.cpp @@ -0,0 +1,6 @@ +MatrixXf a(2,3); a << 1, 2, 3, 4, 5, 6; +cout << "Here is the initial matrix a:\n" << a << endl; + + +a.transposeInPlace(); +cout << "and after being transposed:\n" << a << endl; \ No newline at end of file diff --git a/thirdparty/eigen/doc/snippets/tut_matrix_assignment_resizing.cpp b/thirdparty/eigen/doc/snippets/tut_matrix_assignment_resizing.cpp new file mode 100644 index 000000000..cf189983f --- /dev/null +++ b/thirdparty/eigen/doc/snippets/tut_matrix_assignment_resizing.cpp @@ -0,0 +1,5 @@ +MatrixXf a(2,2); +std::cout << "a is of size " << a.rows() << "x" << a.cols() << std::endl; +MatrixXf b(3,3); +a = b; +std::cout << "a is now of size " << a.rows() << "x" << a.cols() << std::endl; diff --git a/thirdparty/eigen/doc/special_examples/CMakeLists.txt b/thirdparty/eigen/doc/special_examples/CMakeLists.txt new file mode 100644 index 000000000..101fbc5f9 --- /dev/null +++ b/thirdparty/eigen/doc/special_examples/CMakeLists.txt @@ -0,0 +1,35 @@ +if(NOT EIGEN_TEST_NOQT) + find_package(Qt4) + if(QT4_FOUND) + include(${QT_USE_FILE}) + endif() +endif(NOT EIGEN_TEST_NOQT) + +if(QT4_FOUND) + add_executable(Tutorial_sparse_example Tutorial_sparse_example.cpp Tutorial_sparse_example_details.cpp) + target_link_libraries(Tutorial_sparse_example ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${QT_QTCORE_LIBRARY} ${QT_QTGUI_LIBRARY}) + + add_custom_command( + TARGET Tutorial_sparse_example + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/../html/ + COMMAND Tutorial_sparse_example ARGS ${CMAKE_CURRENT_BINARY_DIR}/../html/Tutorial_sparse_example.jpeg + ) + + add_dependencies(all_examples Tutorial_sparse_example) +endif(QT4_FOUND) + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) + add_executable(random_cpp11 random_cpp11.cpp) + target_link_libraries(random_cpp11 ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + add_dependencies(all_examples random_cpp11) + ei_add_target_property(random_cpp11 COMPILE_FLAGS "-std=c++11") + + add_custom_command( + TARGET random_cpp11 + POST_BUILD + COMMAND random_cpp11 + ARGS >${CMAKE_CURRENT_BINARY_DIR}/random_cpp11.out + ) +endif() diff --git a/thirdparty/eigen/doc/special_examples/Tutorial_sparse_example.cpp b/thirdparty/eigen/doc/special_examples/Tutorial_sparse_example.cpp new file mode 100644 index 000000000..830e196ea --- /dev/null +++ b/thirdparty/eigen/doc/special_examples/Tutorial_sparse_example.cpp @@ -0,0 +1,34 @@ +#include +#include + +typedef Eigen::SparseMatrix SpMat; // declares a column-major sparse matrix type of double +typedef Eigen::Triplet T; + +void buildProblem(std::vector& coefficients, Eigen::VectorXd& b, int n); +void saveAsBitmap(const Eigen::VectorXd& x, int n, const char* filename); + +int main(int argc, char** argv) +{ + assert(argc==2); + + int n = 300; // size of the image + int m = n*n; // number of unknows (=number of pixels) + + // Assembly: + std::vector coefficients; // list of non-zeros coefficients + Eigen::VectorXd b(m); // the right hand side-vector resulting from the constraints + buildProblem(coefficients, b, n); + + SpMat A(m,m); + A.setFromTriplets(coefficients.begin(), coefficients.end()); + + // Solving: + Eigen::SimplicialCholesky chol(A); // performs a Cholesky factorization of A + Eigen::VectorXd x = chol.solve(b); // use the factorization to solve for the given right hand side + + // Export the result to a file: + saveAsBitmap(x, n, argv[1]); + + return 0; +} + diff --git a/thirdparty/eigen/doc/special_examples/Tutorial_sparse_example_details.cpp b/thirdparty/eigen/doc/special_examples/Tutorial_sparse_example_details.cpp new file mode 100644 index 000000000..bc18b0188 --- /dev/null +++ b/thirdparty/eigen/doc/special_examples/Tutorial_sparse_example_details.cpp @@ -0,0 +1,44 @@ +#include +#include +#include + +typedef Eigen::SparseMatrix SpMat; // declares a column-major sparse matrix type of double +typedef Eigen::Triplet T; + +void insertCoefficient(int id, int i, int j, double w, std::vector& coeffs, + Eigen::VectorXd& b, const Eigen::VectorXd& boundary) +{ + int n = int(boundary.size()); + int id1 = i+j*n; + + if(i==-1 || i==n) b(id) -= w * boundary(j); // constrained coefficient + else if(j==-1 || j==n) b(id) -= w * boundary(i); // constrained coefficient + else coeffs.push_back(T(id,id1,w)); // unknown coefficient +} + +void buildProblem(std::vector& coefficients, Eigen::VectorXd& b, int n) +{ + b.setZero(); + Eigen::ArrayXd boundary = Eigen::ArrayXd::LinSpaced(n, 0,M_PI).sin().pow(2); + for(int j=0; j bits = (x*255).cast(); + QImage img(bits.data(), n,n,QImage::Format_Indexed8); + img.setColorCount(256); + for(int i=0;i<256;i++) img.setColor(i,qRgb(i,i,i)); + img.save(filename); +} diff --git a/thirdparty/eigen/doc/special_examples/random_cpp11.cpp b/thirdparty/eigen/doc/special_examples/random_cpp11.cpp new file mode 100644 index 000000000..33744c051 --- /dev/null +++ b/thirdparty/eigen/doc/special_examples/random_cpp11.cpp @@ -0,0 +1,14 @@ +#include +#include +#include + +using namespace Eigen; + +int main() { + std::default_random_engine generator; + std::poisson_distribution distribution(4.1); + auto poisson = [&] () {return distribution(generator);}; + + RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson ); + std::cout << v << "\n"; +} diff --git a/thirdparty/eigen/doc/tutorial.cpp b/thirdparty/eigen/doc/tutorial.cpp new file mode 100644 index 000000000..62be7c270 --- /dev/null +++ b/thirdparty/eigen/doc/tutorial.cpp @@ -0,0 +1,62 @@ +#include + +int main(int argc, char *argv[]) +{ + std::cout.precision(2); + + // demo static functions + Eigen::Matrix3f m3 = Eigen::Matrix3f::Random(); + Eigen::Matrix4f m4 = Eigen::Matrix4f::Identity(); + + std::cout << "*** Step 1 ***\nm3:\n" << m3 << "\nm4:\n" << m4 << std::endl; + + // demo non-static set... functions + m4.setZero(); + m3.diagonal().setOnes(); + + std::cout << "*** Step 2 ***\nm3:\n" << m3 << "\nm4:\n" << m4 << std::endl; + + // demo fixed-size block() expression as lvalue and as rvalue + m4.block<3,3>(0,1) = m3; + m3.row(2) = m4.block<1,3>(2,0); + + std::cout << "*** Step 3 ***\nm3:\n" << m3 << "\nm4:\n" << m4 << std::endl; + + // demo dynamic-size block() + { + int rows = 3, cols = 3; + m4.block(0,1,3,3).setIdentity(); + std::cout << "*** Step 4 ***\nm4:\n" << m4 << std::endl; + } + + // demo vector blocks + m4.diagonal().block(1,2).setOnes(); + std::cout << "*** Step 5 ***\nm4.diagonal():\n" << m4.diagonal() << std::endl; + std::cout << "m4.diagonal().start(3)\n" << m4.diagonal().start(3) << std::endl; + + // demo coeff-wise operations + m4 = m4.cwise()*m4; + m3 = m3.cwise().cos(); + std::cout << "*** Step 6 ***\nm3:\n" << m3 << "\nm4:\n" << m4 << std::endl; + + // sums of coefficients + std::cout << "*** Step 7 ***\n m4.sum(): " << m4.sum() << std::endl; + std::cout << "m4.col(2).sum(): " << m4.col(2).sum() << std::endl; + std::cout << "m4.colwise().sum():\n" << m4.colwise().sum() << std::endl; + std::cout << "m4.rowwise().sum():\n" << m4.rowwise().sum() << std::endl; + + // demo intelligent auto-evaluation + m4 = m4 * m4; // auto-evaluates so no aliasing problem (performance penalty is low) + Eigen::Matrix4f other = (m4 * m4).lazy(); // forces lazy evaluation + m4 = m4 + m4; // here Eigen goes for lazy evaluation, as with most expressions + m4 = -m4 + m4 + 5 * m4; // same here, Eigen chooses lazy evaluation for all that. + m4 = m4 * (m4 + m4); // here Eigen chooses to first evaluate m4 + m4 into a temporary. + // indeed, here it is an optimization to cache this intermediate result. + m3 = m3 * m4.block<3,3>(1,1); // here Eigen chooses NOT to evaluate block() into a temporary + // because accessing coefficients of that block expression is not more costly than accessing + // coefficients of a plain matrix. + m4 = m4 * m4.transpose(); // same here, lazy evaluation of the transpose. + m4 = m4 * m4.transpose().eval(); // forces immediate evaluation of the transpose + + std::cout << "*** Step 8 ***\nm3:\n" << m3 << "\nm4:\n" << m4 << std::endl; +} diff --git a/thirdparty/eigen/eigen3.pc.in b/thirdparty/eigen/eigen3.pc.in new file mode 100644 index 000000000..3368a3aa1 --- /dev/null +++ b/thirdparty/eigen/eigen3.pc.in @@ -0,0 +1,9 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} + +Name: Eigen3 +Description: A C++ template library for linear algebra: vectors, matrices, and related algorithms +Requires: +Version: @EIGEN_VERSION_NUMBER@ +Libs: +Cflags: -I${prefix}/@INCLUDE_INSTALL_DIR@ diff --git a/thirdparty/eigen/failtest/CMakeLists.txt b/thirdparty/eigen/failtest/CMakeLists.txt new file mode 100644 index 000000000..1a73f05e6 --- /dev/null +++ b/thirdparty/eigen/failtest/CMakeLists.txt @@ -0,0 +1,75 @@ +message(STATUS "Running the failtests") + +ei_add_failtest("failtest_sanity_check") + +ei_add_failtest("block_nonconst_ctor_on_const_xpr_0") +ei_add_failtest("block_nonconst_ctor_on_const_xpr_1") +ei_add_failtest("block_nonconst_ctor_on_const_xpr_2") +ei_add_failtest("transpose_nonconst_ctor_on_const_xpr") +ei_add_failtest("diagonal_nonconst_ctor_on_const_xpr") +ei_add_failtest("cwiseunaryview_nonconst_ctor_on_const_xpr") +ei_add_failtest("triangularview_nonconst_ctor_on_const_xpr") +ei_add_failtest("selfadjointview_nonconst_ctor_on_const_xpr") + +ei_add_failtest("const_qualified_block_method_retval_0") +ei_add_failtest("const_qualified_block_method_retval_1") +ei_add_failtest("const_qualified_transpose_method_retval") +ei_add_failtest("const_qualified_diagonal_method_retval") + +ei_add_failtest("map_nonconst_ctor_on_const_ptr_0") +ei_add_failtest("map_nonconst_ctor_on_const_ptr_1") +ei_add_failtest("map_nonconst_ctor_on_const_ptr_2") +ei_add_failtest("map_nonconst_ctor_on_const_ptr_3") +ei_add_failtest("map_nonconst_ctor_on_const_ptr_4") + +ei_add_failtest("map_on_const_type_actually_const_0") +ei_add_failtest("map_on_const_type_actually_const_1") +ei_add_failtest("block_on_const_type_actually_const_0") +ei_add_failtest("block_on_const_type_actually_const_1") +ei_add_failtest("transpose_on_const_type_actually_const") +ei_add_failtest("diagonal_on_const_type_actually_const") +ei_add_failtest("cwiseunaryview_on_const_type_actually_const") +ei_add_failtest("triangularview_on_const_type_actually_const") +ei_add_failtest("selfadjointview_on_const_type_actually_const") + +ei_add_failtest("ref_1") +ei_add_failtest("ref_2") +ei_add_failtest("ref_3") +ei_add_failtest("ref_4") +ei_add_failtest("ref_5") + +ei_add_failtest("swap_1") +ei_add_failtest("swap_2") + +ei_add_failtest("ternary_1") +ei_add_failtest("ternary_2") + +ei_add_failtest("sparse_ref_1") +ei_add_failtest("sparse_ref_2") +ei_add_failtest("sparse_ref_3") +ei_add_failtest("sparse_ref_4") +ei_add_failtest("sparse_ref_5") + +ei_add_failtest("sparse_storage_mismatch") + +ei_add_failtest("partialpivlu_int") +ei_add_failtest("fullpivlu_int") +ei_add_failtest("llt_int") +ei_add_failtest("ldlt_int") +ei_add_failtest("qr_int") +ei_add_failtest("colpivqr_int") +ei_add_failtest("fullpivqr_int") +ei_add_failtest("jacobisvd_int") +ei_add_failtest("bdcsvd_int") +ei_add_failtest("eigensolver_int") +ei_add_failtest("eigensolver_cplx") + +if (EIGEN_FAILTEST_FAILURE_COUNT) + message(FATAL_ERROR + "${EIGEN_FAILTEST_FAILURE_COUNT} out of ${EIGEN_FAILTEST_COUNT} failtests FAILED. " + "To debug these failures, manually compile these programs in ${CMAKE_CURRENT_SOURCE_DIR}, " + "with and without #define EIGEN_SHOULD_FAIL_TO_BUILD.") +else() + message(STATUS "Failtest SUCCESS: all ${EIGEN_FAILTEST_COUNT} failtests passed.") + message(STATUS "") +endif() diff --git a/thirdparty/eigen/failtest/bdcsvd_int.cpp b/thirdparty/eigen/failtest/bdcsvd_int.cpp new file mode 100644 index 000000000..670752cf5 --- /dev/null +++ b/thirdparty/eigen/failtest/bdcsvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + BDCSVD > qr(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_0.cpp b/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_0.cpp new file mode 100644 index 000000000..40b82014f --- /dev/null +++ b/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_0.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Block b(m,0,0); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_1.cpp b/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_1.cpp new file mode 100644 index 000000000..ef6d53702 --- /dev/null +++ b/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_1.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Block b(m,0,0,3,3); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_2.cpp b/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_2.cpp new file mode 100644 index 000000000..43f18aecf --- /dev/null +++ b/thirdparty/eigen/failtest/block_nonconst_ctor_on_const_xpr_2.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + // row/column constructor + Block b(m,0); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/block_on_const_type_actually_const_0.cpp b/thirdparty/eigen/failtest/block_on_const_type_actually_const_0.cpp new file mode 100644 index 000000000..009bebece --- /dev/null +++ b/thirdparty/eigen/failtest/block_on_const_type_actually_const_0.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + Matrix3f m; + Block(m, 0, 0, 3, 3).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/block_on_const_type_actually_const_1.cpp b/thirdparty/eigen/failtest/block_on_const_type_actually_const_1.cpp new file mode 100644 index 000000000..4c3e93ffe --- /dev/null +++ b/thirdparty/eigen/failtest/block_on_const_type_actually_const_1.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + Block(m, 0, 0).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/colpivqr_int.cpp b/thirdparty/eigen/failtest/colpivqr_int.cpp new file mode 100644 index 000000000..db11910d4 --- /dev/null +++ b/thirdparty/eigen/failtest/colpivqr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + ColPivHouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/const_qualified_block_method_retval_0.cpp b/thirdparty/eigen/failtest/const_qualified_block_method_retval_0.cpp new file mode 100644 index 000000000..a6bd5fee2 --- /dev/null +++ b/thirdparty/eigen/failtest/const_qualified_block_method_retval_0.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Block b(m.block<3,3>(0,0)); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/const_qualified_block_method_retval_1.cpp b/thirdparty/eigen/failtest/const_qualified_block_method_retval_1.cpp new file mode 100644 index 000000000..ef40c247c --- /dev/null +++ b/thirdparty/eigen/failtest/const_qualified_block_method_retval_1.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Block b(m.block(0,0,3,3)); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/const_qualified_diagonal_method_retval.cpp b/thirdparty/eigen/failtest/const_qualified_diagonal_method_retval.cpp new file mode 100644 index 000000000..809594aab --- /dev/null +++ b/thirdparty/eigen/failtest/const_qualified_diagonal_method_retval.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Diagonal b(m.diagonal()); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/const_qualified_transpose_method_retval.cpp b/thirdparty/eigen/failtest/const_qualified_transpose_method_retval.cpp new file mode 100644 index 000000000..2d7f19cab --- /dev/null +++ b/thirdparty/eigen/failtest/const_qualified_transpose_method_retval.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Transpose b(m.transpose()); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp b/thirdparty/eigen/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 000000000..e23cf8fd8 --- /dev/null +++ b/thirdparty/eigen/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + CwiseUnaryView,Matrix3d> t(m); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/cwiseunaryview_on_const_type_actually_const.cpp b/thirdparty/eigen/failtest/cwiseunaryview_on_const_type_actually_const.cpp new file mode 100644 index 000000000..fcd41dfdb --- /dev/null +++ b/thirdparty/eigen/failtest/cwiseunaryview_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + CwiseUnaryView,CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/diagonal_nonconst_ctor_on_const_xpr.cpp b/thirdparty/eigen/failtest/diagonal_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 000000000..76398a2c2 --- /dev/null +++ b/thirdparty/eigen/failtest/diagonal_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Diagonal d(m); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/diagonal_on_const_type_actually_const.cpp b/thirdparty/eigen/failtest/diagonal_on_const_type_actually_const.cpp new file mode 100644 index 000000000..d4b2fd9b8 --- /dev/null +++ b/thirdparty/eigen/failtest/diagonal_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + Diagonal(m).coeffRef(0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/eigensolver_cplx.cpp b/thirdparty/eigen/failtest/eigensolver_cplx.cpp new file mode 100644 index 000000000..c2e21e189 --- /dev/null +++ b/thirdparty/eigen/failtest/eigensolver_cplx.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Eigenvalues" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR std::complex +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + EigenSolver > eig(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/eigensolver_int.cpp b/thirdparty/eigen/failtest/eigensolver_int.cpp new file mode 100644 index 000000000..eda8dc20b --- /dev/null +++ b/thirdparty/eigen/failtest/eigensolver_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Eigenvalues" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + EigenSolver > eig(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/failtest_sanity_check.cpp b/thirdparty/eigen/failtest/failtest_sanity_check.cpp new file mode 100644 index 000000000..769fa942d --- /dev/null +++ b/thirdparty/eigen/failtest/failtest_sanity_check.cpp @@ -0,0 +1,5 @@ +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +This is just some text that won't compile as a C++ file, as a basic sanity check for failtest. +#else +int main() {} +#endif diff --git a/thirdparty/eigen/failtest/fullpivlu_int.cpp b/thirdparty/eigen/failtest/fullpivlu_int.cpp new file mode 100644 index 000000000..e9d2c6eb3 --- /dev/null +++ b/thirdparty/eigen/failtest/fullpivlu_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/LU" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + FullPivLU > lu(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/fullpivqr_int.cpp b/thirdparty/eigen/failtest/fullpivqr_int.cpp new file mode 100644 index 000000000..d182a7b6b --- /dev/null +++ b/thirdparty/eigen/failtest/fullpivqr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + FullPivHouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/jacobisvd_int.cpp b/thirdparty/eigen/failtest/jacobisvd_int.cpp new file mode 100644 index 000000000..12790aef1 --- /dev/null +++ b/thirdparty/eigen/failtest/jacobisvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + JacobiSVD > qr(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/ldlt_int.cpp b/thirdparty/eigen/failtest/ldlt_int.cpp new file mode 100644 index 000000000..243e45746 --- /dev/null +++ b/thirdparty/eigen/failtest/ldlt_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Cholesky" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + LDLT > ldlt(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/llt_int.cpp b/thirdparty/eigen/failtest/llt_int.cpp new file mode 100644 index 000000000..cb020650d --- /dev/null +++ b/thirdparty/eigen/failtest/llt_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Cholesky" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + LLT > llt(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_0.cpp b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_0.cpp new file mode 100644 index 000000000..d75686f58 --- /dev/null +++ b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_0.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER float *ptr){ + Map m(ptr); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_1.cpp b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_1.cpp new file mode 100644 index 000000000..eda134dc8 --- /dev/null +++ b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_1.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER float *ptr, DenseIndex size){ + Map m(ptr, size); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_2.cpp b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_2.cpp new file mode 100644 index 000000000..06b4b6275 --- /dev/null +++ b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_2.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER float *ptr, DenseIndex rows, DenseIndex cols){ + Map m(ptr, rows, cols); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_3.cpp b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_3.cpp new file mode 100644 index 000000000..830f6f0c9 --- /dev/null +++ b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_3.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER float *ptr, DenseIndex rows, DenseIndex cols){ + Map > m(ptr, rows, cols, InnerStride<2>()); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_4.cpp b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_4.cpp new file mode 100644 index 000000000..c3e8c952c --- /dev/null +++ b/thirdparty/eigen/failtest/map_nonconst_ctor_on_const_ptr_4.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER +#else +#define CV_QUALIFIER const +#endif + +using namespace Eigen; + +void foo(const float *ptr, DenseIndex rows, DenseIndex cols){ + Map > m(ptr, rows, cols, OuterStride<>(2)); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/map_on_const_type_actually_const_0.cpp b/thirdparty/eigen/failtest/map_on_const_type_actually_const_0.cpp new file mode 100644 index 000000000..8cb6aa0cd --- /dev/null +++ b/thirdparty/eigen/failtest/map_on_const_type_actually_const_0.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(float *ptr){ + Map(ptr, 1, 1).coeffRef(0,0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/map_on_const_type_actually_const_1.cpp b/thirdparty/eigen/failtest/map_on_const_type_actually_const_1.cpp new file mode 100644 index 000000000..04e067c34 --- /dev/null +++ b/thirdparty/eigen/failtest/map_on_const_type_actually_const_1.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(float *ptr){ + Map(ptr).coeffRef(0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/partialpivlu_int.cpp b/thirdparty/eigen/failtest/partialpivlu_int.cpp new file mode 100644 index 000000000..98ef282ea --- /dev/null +++ b/thirdparty/eigen/failtest/partialpivlu_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/LU" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + PartialPivLU > lu(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/qr_int.cpp b/thirdparty/eigen/failtest/qr_int.cpp new file mode 100644 index 000000000..ce200e818 --- /dev/null +++ b/thirdparty/eigen/failtest/qr_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/QR" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + HouseholderQR > qr(Matrix::Random(10,10)); +} diff --git a/thirdparty/eigen/failtest/ref_1.cpp b/thirdparty/eigen/failtest/ref_1.cpp new file mode 100644 index 000000000..8b798d53d --- /dev/null +++ b/thirdparty/eigen/failtest/ref_1.cpp @@ -0,0 +1,18 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void call_ref(Ref a) { } + +int main() +{ + VectorXf a(10); + CV_QUALIFIER VectorXf& ac(a); + call_ref(ac); +} diff --git a/thirdparty/eigen/failtest/ref_2.cpp b/thirdparty/eigen/failtest/ref_2.cpp new file mode 100644 index 000000000..0b779ccf5 --- /dev/null +++ b/thirdparty/eigen/failtest/ref_2.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +void call_ref(Ref a) { } + +int main() +{ + MatrixXf A(10,10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(A.row(3)); +#else + call_ref(A.col(3)); +#endif +} diff --git a/thirdparty/eigen/failtest/ref_3.cpp b/thirdparty/eigen/failtest/ref_3.cpp new file mode 100644 index 000000000..f46027d48 --- /dev/null +++ b/thirdparty/eigen/failtest/ref_3.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +void call_ref(Ref a) { } +#else +void call_ref(const Ref &a) { } +#endif + +int main() +{ + VectorXf a(10); + call_ref(a+a); +} diff --git a/thirdparty/eigen/failtest/ref_4.cpp b/thirdparty/eigen/failtest/ref_4.cpp new file mode 100644 index 000000000..6c11fa4cb --- /dev/null +++ b/thirdparty/eigen/failtest/ref_4.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +void call_ref(Ref > a) {} + +int main() +{ + MatrixXf A(10,10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(A.transpose()); +#else + call_ref(A); +#endif +} diff --git a/thirdparty/eigen/failtest/ref_5.cpp b/thirdparty/eigen/failtest/ref_5.cpp new file mode 100644 index 000000000..846d52795 --- /dev/null +++ b/thirdparty/eigen/failtest/ref_5.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +void call_ref(Ref a) { } + +int main() +{ + VectorXf a(10); + DenseBase &ac(a); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(ac); +#else + call_ref(ac.derived()); +#endif +} diff --git a/thirdparty/eigen/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp b/thirdparty/eigen/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 000000000..a240f8184 --- /dev/null +++ b/thirdparty/eigen/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + SelfAdjointView t(m); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/selfadjointview_on_const_type_actually_const.cpp b/thirdparty/eigen/failtest/selfadjointview_on_const_type_actually_const.cpp new file mode 100644 index 000000000..19aaad6d0 --- /dev/null +++ b/thirdparty/eigen/failtest/selfadjointview_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + SelfAdjointView(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/sparse_ref_1.cpp b/thirdparty/eigen/failtest/sparse_ref_1.cpp new file mode 100644 index 000000000..d78d1f9b1 --- /dev/null +++ b/thirdparty/eigen/failtest/sparse_ref_1.cpp @@ -0,0 +1,18 @@ +#include "../Eigen/Sparse" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void call_ref(Ref > a) { } + +int main() +{ + SparseMatrix a(10,10); + CV_QUALIFIER SparseMatrix& ac(a); + call_ref(ac); +} diff --git a/thirdparty/eigen/failtest/sparse_ref_2.cpp b/thirdparty/eigen/failtest/sparse_ref_2.cpp new file mode 100644 index 000000000..46c9440c2 --- /dev/null +++ b/thirdparty/eigen/failtest/sparse_ref_2.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +void call_ref(Ref > a) { } + +int main() +{ + SparseMatrix A(10,10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(A.row(3)); +#else + call_ref(A.col(3)); +#endif +} diff --git a/thirdparty/eigen/failtest/sparse_ref_3.cpp b/thirdparty/eigen/failtest/sparse_ref_3.cpp new file mode 100644 index 000000000..a9949b552 --- /dev/null +++ b/thirdparty/eigen/failtest/sparse_ref_3.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +void call_ref(Ref > a) { } +#else +void call_ref(const Ref > &a) { } +#endif + +int main() +{ + SparseMatrix a(10,10); + call_ref(a+a); +} diff --git a/thirdparty/eigen/failtest/sparse_ref_4.cpp b/thirdparty/eigen/failtest/sparse_ref_4.cpp new file mode 100644 index 000000000..57bb6a1fc --- /dev/null +++ b/thirdparty/eigen/failtest/sparse_ref_4.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +void call_ref(Ref > a) {} + +int main() +{ + SparseMatrix A(10,10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(A.transpose()); +#else + call_ref(A); +#endif +} diff --git a/thirdparty/eigen/failtest/sparse_ref_5.cpp b/thirdparty/eigen/failtest/sparse_ref_5.cpp new file mode 100644 index 000000000..4478f6f2f --- /dev/null +++ b/thirdparty/eigen/failtest/sparse_ref_5.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +void call_ref(Ref > a) { } + +int main() +{ + SparseMatrix a(10,10); + SparseMatrixBase > &ac(a); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(ac); +#else + call_ref(ac.derived()); +#endif +} diff --git a/thirdparty/eigen/failtest/sparse_storage_mismatch.cpp b/thirdparty/eigen/failtest/sparse_storage_mismatch.cpp new file mode 100644 index 000000000..51840d416 --- /dev/null +++ b/thirdparty/eigen/failtest/sparse_storage_mismatch.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Sparse" +using namespace Eigen; + +typedef SparseMatrix Mat1; +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +typedef SparseMatrix Mat2; +#else +typedef SparseMatrix Mat2; +#endif + +int main() +{ + Mat1 a(10,10); + Mat2 b(10,10); + a += b; +} diff --git a/thirdparty/eigen/failtest/swap_1.cpp b/thirdparty/eigen/failtest/swap_1.cpp new file mode 100644 index 000000000..106379720 --- /dev/null +++ b/thirdparty/eigen/failtest/swap_1.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main() +{ + VectorXf a(10), b(10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + const DenseBase &ac(a); +#else + DenseBase &ac(a); +#endif + b.swap(ac); +} diff --git a/thirdparty/eigen/failtest/swap_2.cpp b/thirdparty/eigen/failtest/swap_2.cpp new file mode 100644 index 000000000..c130ba6e4 --- /dev/null +++ b/thirdparty/eigen/failtest/swap_2.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main() +{ + VectorXf a(10), b(10); + VectorXf const &ac(a); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + b.swap(ac); +#else + b.swap(ac.const_cast_derived()); +#endif +} \ No newline at end of file diff --git a/thirdparty/eigen/failtest/ternary_1.cpp b/thirdparty/eigen/failtest/ternary_1.cpp new file mode 100644 index 000000000..b40bcb0cc --- /dev/null +++ b/thirdparty/eigen/failtest/ternary_1.cpp @@ -0,0 +1,13 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main(int argc,char **) +{ + VectorXf a(10), b(10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + b = argc>1 ? 2*a : -a; +#else + b = argc>1 ? 2*a : VectorXf(-a); +#endif +} diff --git a/thirdparty/eigen/failtest/ternary_2.cpp b/thirdparty/eigen/failtest/ternary_2.cpp new file mode 100644 index 000000000..a46b12b2b --- /dev/null +++ b/thirdparty/eigen/failtest/ternary_2.cpp @@ -0,0 +1,13 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main(int argc,char **) +{ + VectorXf a(10), b(10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + b = argc>1 ? 2*a : a+a; +#else + b = argc>1 ? VectorXf(2*a) : VectorXf(a+a); +#endif +} diff --git a/thirdparty/eigen/failtest/transpose_nonconst_ctor_on_const_xpr.cpp b/thirdparty/eigen/failtest/transpose_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 000000000..4223e7fd7 --- /dev/null +++ b/thirdparty/eigen/failtest/transpose_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + Transpose t(m); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/transpose_on_const_type_actually_const.cpp b/thirdparty/eigen/failtest/transpose_on_const_type_actually_const.cpp new file mode 100644 index 000000000..d0b7d0df6 --- /dev/null +++ b/thirdparty/eigen/failtest/transpose_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + Transpose(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp b/thirdparty/eigen/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 000000000..807447e4b --- /dev/null +++ b/thirdparty/eigen/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + TriangularView t(m); +} + +int main() {} diff --git a/thirdparty/eigen/failtest/triangularview_on_const_type_actually_const.cpp b/thirdparty/eigen/failtest/triangularview_on_const_type_actually_const.cpp new file mode 100644 index 000000000..0a381a612 --- /dev/null +++ b/thirdparty/eigen/failtest/triangularview_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + TriangularView(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/thirdparty/eigen/lapack/CMakeLists.txt b/thirdparty/eigen/lapack/CMakeLists.txt new file mode 100644 index 000000000..9883d4c72 --- /dev/null +++ b/thirdparty/eigen/lapack/CMakeLists.txt @@ -0,0 +1,449 @@ + +project(EigenLapack CXX) + +include("../cmake/language_support.cmake") + +workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS) + +if(EIGEN_Fortran_COMPILER_WORKS) + enable_language(Fortran OPTIONAL) + if(NOT CMAKE_Fortran_COMPILER) + set(EIGEN_Fortran_COMPILER_WORKS OFF) + endif() +endif() + +add_custom_target(lapack) +include_directories(../blas) + +set(EigenLapack_SRCS +single.cpp double.cpp complex_single.cpp complex_double.cpp ../blas/xerbla.cpp +) + +if(EIGEN_Fortran_COMPILER_WORKS) + +set(EigenLapack_SRCS ${EigenLapack_SRCS} + slarft.f dlarft.f clarft.f zlarft.f + slarfb.f dlarfb.f clarfb.f zlarfb.f + slarfg.f dlarfg.f clarfg.f zlarfg.f + slarf.f dlarf.f clarf.f zlarf.f + sladiv.f dladiv.f cladiv.f zladiv.f + ilaslr.f iladlr.f ilaclr.f ilazlr.f + ilaslc.f iladlc.f ilaclc.f ilazlc.f + dlapy2.f dlapy3.f slapy2.f slapy3.f + clacgv.f zlacgv.f + slamch.f dlamch.f + second_NONE.f dsecnd_NONE.f +) + +option(EIGEN_ENABLE_LAPACK_TESTS OFF "Enbale the Lapack unit tests") + +if(EIGEN_ENABLE_LAPACK_TESTS) + + get_filename_component(eigen_full_path_to_reference_lapack "./reference/" ABSOLUTE) + if(NOT EXISTS ${eigen_full_path_to_reference_lapack}) + # Download lapack and install sources and testing at the right place + message(STATUS "Download lapack_addons_3.4.1.tgz...") + + file(DOWNLOAD "http://downloads.tuxfamily.org/eigen/lapack_addons_3.4.1.tgz" + "${CMAKE_CURRENT_SOURCE_DIR}/lapack_addons_3.4.1.tgz" + INACTIVITY_TIMEOUT 15 + TIMEOUT 240 + STATUS download_status + EXPECTED_MD5 5758ce55afcf79da98de8b9de1615ad5 + SHOW_PROGRESS) + + message(STATUS ${download_status}) + list(GET download_status 0 download_status_num) + set(download_status_num 0) + if(download_status_num EQUAL 0) + message(STATUS "Setup lapack reference and lapack unit tests") + execute_process(COMMAND tar xzf "lapack_addons_3.4.1.tgz" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + else() + message(STATUS "Download of lapack_addons_3.4.1.tgz failed, LAPACK unit tests wont be enabled") + set(EIGEN_ENABLE_LAPACK_TESTS false) + endif() + + endif() + + get_filename_component(eigen_full_path_to_reference_lapack "./reference/" ABSOLUTE) + if(EXISTS ${eigen_full_path_to_reference_lapack}) + set(EigenLapack_funcfilenames + ssyev.f dsyev.f csyev.f zsyev.f + spotrf.f dpotrf.f cpotrf.f zpotrf.f + spotrs.f dpotrs.f cpotrs.f zpotrs.f + sgetrf.f dgetrf.f cgetrf.f zgetrf.f + sgetrs.f dgetrs.f cgetrs.f zgetrs.f) + + FILE(GLOB ReferenceLapack_SRCS0 RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "reference/*.f") + foreach(filename1 IN LISTS ReferenceLapack_SRCS0) + string(REPLACE "reference/" "" filename ${filename1}) + list(FIND EigenLapack_SRCS ${filename} id1) + list(FIND EigenLapack_funcfilenames ${filename} id2) + if((id1 EQUAL -1) AND (id2 EQUAL -1)) + set(ReferenceLapack_SRCS ${ReferenceLapack_SRCS} reference/${filename}) + endif() + endforeach() + endif() + + +endif(EIGEN_ENABLE_LAPACK_TESTS) + +endif(EIGEN_Fortran_COMPILER_WORKS) + +add_library(eigen_lapack_static ${EigenLapack_SRCS} ${ReferenceLapack_SRCS}) +add_library(eigen_lapack SHARED ${EigenLapack_SRCS}) + +target_link_libraries(eigen_lapack eigen_blas) + +if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(eigen_lapack_static ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + target_link_libraries(eigen_lapack ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) +endif() + +add_dependencies(lapack eigen_lapack eigen_lapack_static) + +install(TARGETS eigen_lapack eigen_lapack_static + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib) + + + +get_filename_component(eigen_full_path_to_testing_lapack "./testing/" ABSOLUTE) +if(EXISTS ${eigen_full_path_to_testing_lapack}) + + # The following comes from lapack/TESTING/CMakeLists.txt + # Get Python + find_package(PythonInterp) + message(STATUS "Looking for Python found - ${PYTHONINTERP_FOUND}") + if (PYTHONINTERP_FOUND) + message(STATUS "Using Python version ${PYTHON_VERSION_STRING}") + endif() + + set(LAPACK_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + set(LAPACK_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + set(BUILD_SINGLE true) + set(BUILD_DOUBLE true) + set(BUILD_COMPLEX true) + set(BUILD_COMPLEX16E true) + + if(MSVC_VERSION) +# string(REPLACE "/STACK:10000000" "/STACK:900000000000000000" +# CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + string(REGEX REPLACE "(.*)/STACK:(.*) (.*)" "\\1/STACK:900000000000000000 \\3" + CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + endif() + add_subdirectory(testing/MATGEN) + add_subdirectory(testing/LIN) + add_subdirectory(testing/EIG) + macro(add_lapack_test output input target) + set(TEST_INPUT "${LAPACK_SOURCE_DIR}/testing/${input}") + set(TEST_OUTPUT "${LAPACK_BINARY_DIR}/testing/${output}") + get_target_property(TEST_LOC ${target} LOCATION) + string(REPLACE "." "_" input_name ${input}) + set(testName "${target}_${input_name}") + if(EXISTS "${TEST_INPUT}") + add_test(LAPACK-${testName} "${CMAKE_COMMAND}" + -DTEST=${TEST_LOC} + -DINPUT=${TEST_INPUT} + -DOUTPUT=${TEST_OUTPUT} + -DINTDIR=${CMAKE_CFG_INTDIR} + -P "${LAPACK_SOURCE_DIR}/testing/runtest.cmake") + endif() + endmacro(add_lapack_test) + + if (BUILD_SINGLE) + add_lapack_test(stest.out stest.in xlintsts) + # + # ======== SINGLE RFP LIN TESTS ======================== + add_lapack_test(stest_rfp.out stest_rfp.in xlintstrfs) + # + # + # ======== SINGLE EIG TESTS =========================== + # + + add_lapack_test(snep.out nep.in xeigtsts) + + + add_lapack_test(ssep.out sep.in xeigtsts) + + + add_lapack_test(ssvd.out svd.in xeigtsts) + + + add_lapack_test(sec.out sec.in xeigtsts) + + + add_lapack_test(sed.out sed.in xeigtsts) + + + add_lapack_test(sgg.out sgg.in xeigtsts) + + + add_lapack_test(sgd.out sgd.in xeigtsts) + + + add_lapack_test(ssb.out ssb.in xeigtsts) + + + add_lapack_test(ssg.out ssg.in xeigtsts) + + + add_lapack_test(sbal.out sbal.in xeigtsts) + + + add_lapack_test(sbak.out sbak.in xeigtsts) + + + add_lapack_test(sgbal.out sgbal.in xeigtsts) + + + add_lapack_test(sgbak.out sgbak.in xeigtsts) + + + add_lapack_test(sbb.out sbb.in xeigtsts) + + + add_lapack_test(sglm.out glm.in xeigtsts) + + + add_lapack_test(sgqr.out gqr.in xeigtsts) + + + add_lapack_test(sgsv.out gsv.in xeigtsts) + + + add_lapack_test(scsd.out csd.in xeigtsts) + + + add_lapack_test(slse.out lse.in xeigtsts) + endif() + + if (BUILD_DOUBLE) + # + # ======== DOUBLE LIN TESTS =========================== + add_lapack_test(dtest.out dtest.in xlintstd) + # + # ======== DOUBLE RFP LIN TESTS ======================== + add_lapack_test(dtest_rfp.out dtest_rfp.in xlintstrfd) + # + # ======== DOUBLE EIG TESTS =========================== + + add_lapack_test(dnep.out nep.in xeigtstd) + + + add_lapack_test(dsep.out sep.in xeigtstd) + + + add_lapack_test(dsvd.out svd.in xeigtstd) + + + add_lapack_test(dec.out dec.in xeigtstd) + + + add_lapack_test(ded.out ded.in xeigtstd) + + + add_lapack_test(dgg.out dgg.in xeigtstd) + + + add_lapack_test(dgd.out dgd.in xeigtstd) + + + add_lapack_test(dsb.out dsb.in xeigtstd) + + + add_lapack_test(dsg.out dsg.in xeigtstd) + + + add_lapack_test(dbal.out dbal.in xeigtstd) + + + add_lapack_test(dbak.out dbak.in xeigtstd) + + + add_lapack_test(dgbal.out dgbal.in xeigtstd) + + + add_lapack_test(dgbak.out dgbak.in xeigtstd) + + + add_lapack_test(dbb.out dbb.in xeigtstd) + + + add_lapack_test(dglm.out glm.in xeigtstd) + + + add_lapack_test(dgqr.out gqr.in xeigtstd) + + + add_lapack_test(dgsv.out gsv.in xeigtstd) + + + add_lapack_test(dcsd.out csd.in xeigtstd) + + + add_lapack_test(dlse.out lse.in xeigtstd) + endif() + + if (BUILD_COMPLEX) + add_lapack_test(ctest.out ctest.in xlintstc) + # + # ======== COMPLEX RFP LIN TESTS ======================== + add_lapack_test(ctest_rfp.out ctest_rfp.in xlintstrfc) + # + # ======== COMPLEX EIG TESTS =========================== + + add_lapack_test(cnep.out nep.in xeigtstc) + + + add_lapack_test(csep.out sep.in xeigtstc) + + + add_lapack_test(csvd.out svd.in xeigtstc) + + + add_lapack_test(cec.out cec.in xeigtstc) + + + add_lapack_test(ced.out ced.in xeigtstc) + + + add_lapack_test(cgg.out cgg.in xeigtstc) + + + add_lapack_test(cgd.out cgd.in xeigtstc) + + + add_lapack_test(csb.out csb.in xeigtstc) + + + add_lapack_test(csg.out csg.in xeigtstc) + + + add_lapack_test(cbal.out cbal.in xeigtstc) + + + add_lapack_test(cbak.out cbak.in xeigtstc) + + + add_lapack_test(cgbal.out cgbal.in xeigtstc) + + + add_lapack_test(cgbak.out cgbak.in xeigtstc) + + + add_lapack_test(cbb.out cbb.in xeigtstc) + + + add_lapack_test(cglm.out glm.in xeigtstc) + + + add_lapack_test(cgqr.out gqr.in xeigtstc) + + + add_lapack_test(cgsv.out gsv.in xeigtstc) + + + add_lapack_test(ccsd.out csd.in xeigtstc) + + + add_lapack_test(clse.out lse.in xeigtstc) + endif() + + if (BUILD_COMPLEX16) + # + # ======== COMPLEX16 LIN TESTS ======================== + add_lapack_test(ztest.out ztest.in xlintstz) + # + # ======== COMPLEX16 RFP LIN TESTS ======================== + add_lapack_test(ztest_rfp.out ztest_rfp.in xlintstrfz) + # + # ======== COMPLEX16 EIG TESTS =========================== + + add_lapack_test(znep.out nep.in xeigtstz) + + + add_lapack_test(zsep.out sep.in xeigtstz) + + + add_lapack_test(zsvd.out svd.in xeigtstz) + + + add_lapack_test(zec.out zec.in xeigtstz) + + + add_lapack_test(zed.out zed.in xeigtstz) + + + add_lapack_test(zgg.out zgg.in xeigtstz) + + + add_lapack_test(zgd.out zgd.in xeigtstz) + + + add_lapack_test(zsb.out zsb.in xeigtstz) + + + add_lapack_test(zsg.out zsg.in xeigtstz) + + + add_lapack_test(zbal.out zbal.in xeigtstz) + + + add_lapack_test(zbak.out zbak.in xeigtstz) + + + add_lapack_test(zgbal.out zgbal.in xeigtstz) + + + add_lapack_test(zgbak.out zgbak.in xeigtstz) + + + add_lapack_test(zbb.out zbb.in xeigtstz) + + + add_lapack_test(zglm.out glm.in xeigtstz) + + + add_lapack_test(zgqr.out gqr.in xeigtstz) + + + add_lapack_test(zgsv.out gsv.in xeigtstz) + + + add_lapack_test(zcsd.out csd.in xeigtstz) + + + add_lapack_test(zlse.out lse.in xeigtstz) + endif() + + + if (BUILD_SIMPLE) + if (BUILD_DOUBLE) + # + # ======== SINGLE-DOUBLE PROTO LIN TESTS ============== + add_lapack_test(dstest.out dstest.in xlintstds) + endif() + endif() + + + if (BUILD_COMPLEX) + if (BUILD_COMPLEX16) + # + # ======== COMPLEX-COMPLEX16 LIN TESTS ======================== + add_lapack_test(zctest.out zctest.in xlintstzc) + endif() + endif() + + # ============================================================================== + + execute_process(COMMAND ${CMAKE_COMMAND} -E copy ${LAPACK_SOURCE_DIR}/testing/lapack_testing.py ${LAPACK_BINARY_DIR}) + add_test( + NAME LAPACK_Test_Summary + WORKING_DIRECTORY ${LAPACK_BINARY_DIR} + COMMAND ${PYTHON_EXECUTABLE} "lapack_testing.py" + ) + +endif() + diff --git a/thirdparty/eigen/lapack/cholesky.cpp b/thirdparty/eigen/lapack/cholesky.cpp new file mode 100644 index 000000000..ea3bc123b --- /dev/null +++ b/thirdparty/eigen/lapack/cholesky.cpp @@ -0,0 +1,72 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "lapack_common.h" +#include + +// POTRF computes the Cholesky factorization of a real symmetric positive definite matrix A. +EIGEN_LAPACK_FUNC(potrf,(char* uplo, int *n, RealScalar *pa, int *lda, int *info)) +{ + *info = 0; + if(UPLO(*uplo)==INVALID) *info = -1; + else if(*n<0) *info = -2; + else if(*lda(pa); + MatrixType A(a,*n,*n,*lda); + int ret; + if(UPLO(*uplo)==UP) ret = int(internal::llt_inplace::blocked(A)); + else ret = int(internal::llt_inplace::blocked(A)); + + if(ret>=0) + *info = ret+1; + + return 0; +} + +// POTRS solves a system of linear equations A*X = B with a symmetric +// positive definite matrix A using the Cholesky factorization +// A = U**T*U or A = L*L**T computed by DPOTRF. +EIGEN_LAPACK_FUNC(potrs,(char* uplo, int *n, int *nrhs, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, int *info)) +{ + *info = 0; + if(UPLO(*uplo)==INVALID) *info = -1; + else if(*n<0) *info = -2; + else if(*nrhs<0) *info = -3; + else if(*lda(pa); + Scalar* b = reinterpret_cast(pb); + MatrixType A(a,*n,*n,*lda); + MatrixType B(b,*n,*nrhs,*ldb); + + if(UPLO(*uplo)==UP) + { + A.triangularView().adjoint().solveInPlace(B); + A.triangularView().solveInPlace(B); + } + else + { + A.triangularView().solveInPlace(B); + A.triangularView().adjoint().solveInPlace(B); + } + + return 0; +} diff --git a/thirdparty/eigen/lapack/clacgv.f b/thirdparty/eigen/lapack/clacgv.f new file mode 100644 index 000000000..359eb07f3 --- /dev/null +++ b/thirdparty/eigen/lapack/clacgv.f @@ -0,0 +1,116 @@ +*> \brief \b CLACGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACGV( N, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACGV conjugates a complex vector of length N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vector X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension +*> (1+(N-1)*abs(INCX)) +*> On entry, the vector of length N to be conjugated. +*> On exit, X is overwritten with conjg(X). +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive elements of X. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = CONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = CONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of CLACGV +* + END diff --git a/thirdparty/eigen/lapack/cladiv.f b/thirdparty/eigen/lapack/cladiv.f new file mode 100644 index 000000000..2807ac5fc --- /dev/null +++ b/thirdparty/eigen/lapack/cladiv.f @@ -0,0 +1,97 @@ +*> \brief \b CLADIV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* COMPLEX FUNCTION CLADIV( X, Y ) +* +* .. Scalar Arguments .. +* COMPLEX X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLADIV := X / Y, where X and Y are complex. The computation of X / Y +*> will not overflow on an intermediary step unless the results +*> overflows. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is COMPLEX +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX +*> The complex scalars X and Y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + COMPLEX FUNCTION CLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX X, Y +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* + CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR, + $ ZI ) + CLADIV = CMPLX( ZR, ZI ) +* + RETURN +* +* End of CLADIV +* + END diff --git a/thirdparty/eigen/lapack/clarf.f b/thirdparty/eigen/lapack/clarf.f new file mode 100644 index 000000000..ca0328fb5 --- /dev/null +++ b/thirdparty/eigen/lapack/clarf.f @@ -0,0 +1,232 @@ +*> \brief \b CLARF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARF applies a complex elementary reflector H to a complex M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILACLR, ILACLC + EXTERNAL LSAME, ILACLR, ILACLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILACLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILACLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) +* + CALL CGEMV( 'Conjugate transpose', LASTV, LASTC, ONE, + $ C, LDC, V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**H +* + CALL CGERC( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL CGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**H +* + CALL CGERC( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of CLARF +* + END diff --git a/thirdparty/eigen/lapack/clarfb.f b/thirdparty/eigen/lapack/clarfb.f new file mode 100644 index 000000000..40bbdf487 --- /dev/null +++ b/thirdparty/eigen/lapack/clarfb.f @@ -0,0 +1,771 @@ +*> \brief \b CLARFB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFB applies a complex block reflector H or its transpose H**H to a +*> complex M-by-N matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J, LASTV, LASTC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILACLR, ILACLC + EXTERNAL LSAME, ILACLR, ILACLC +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILACLR( M, K, V, LDV ) ) + LASTC = ILACLC( LASTV, N, C, LDC ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C1**H +* + DO 10 J = 1, K + CALL CCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( LASTC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**H *V2 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ LASTV-K, LASTC, K, -ONE, V( K+1, 1 ), LDV, + $ WORK, LDWORK, ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 30 J = 1, K + DO 20 I = 1, LASTC + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILACLR( N, K, V, LDV ) ) + LASTC = ILACLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL CCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILACLR( M, K, V, LDV ) ) + LASTC = ILACLC( LASTV, N, C, LDC ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C2**H +* + DO 70 J = 1, K + CALL CCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + CALL CLACGV( LASTC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**H*V1 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1 * W**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 90 J = 1, K + DO 80 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILACLR( N, K, V, LDV ) ) + LASTC = ILACLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL CCOPY( LASTC, C( 1, LASTV-K+J ), 1, + $ WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) + $ - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILACLC( K, M, V, LDV ) ) + LASTC = ILACLC( LASTV, N, C, LDC ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C1**H +* + DO 130 J = 1, K + CALL CCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( LASTC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**H*V2**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2**H * W**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTV-K, LASTC, K, + $ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 150 J = 1, K + DO 140 I = 1, LASTC + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILACLC( K, N, V, LDV ) ) + LASTC = ILACLR( M, LASTV, C, LDC ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL CCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, K, LASTV-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILACLC( K, M, V, LDV ) ) + LASTC = ILACLC( LASTV, N, C, LDC ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C2**H +* + DO 190 J = 1, K + CALL CCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + CALL CLACGV( LASTC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**H * V1**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTC, K, LASTV-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1**H * W**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTV-K, LASTC, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 210 J = 1, K + DO 200 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILACLC( K, N, V, LDV ) ) + LASTC = ILACLR( M, LASTV, C, LDC ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL CCOPY( LASTC, C( 1, LASTV-K+J ), 1, + $ WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) + $ - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of CLARFB +* + END diff --git a/thirdparty/eigen/lapack/clarfg.f b/thirdparty/eigen/lapack/clarfg.f new file mode 100644 index 000000000..d64f396c3 --- /dev/null +++ b/thirdparty/eigen/lapack/clarfg.f @@ -0,0 +1,203 @@ +*> \brief \b CLARFG +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFG generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, with beta real, and x is an +*> (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> +*> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SCNRM2, SLAMCH, SLAPY3 + COMPLEX CLADIV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, CLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHR = REAL( ALPHA ) + ALPHI = AIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL CSSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHA = CMPLX( ALPHR, ALPHI ) + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) + CALL CSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of CLARFG +* + END diff --git a/thirdparty/eigen/lapack/clarft.f b/thirdparty/eigen/lapack/clarft.f new file mode 100644 index 000000000..981447f77 --- /dev/null +++ b/thirdparty/eigen/lapack/clarft.f @@ -0,0 +1,328 @@ +*> \brief \b CLARFT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL CGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, + $ ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of CLARFT +* + END diff --git a/thirdparty/eigen/lapack/complex_double.cpp b/thirdparty/eigen/lapack/complex_double.cpp new file mode 100644 index 000000000..c9c575273 --- /dev/null +++ b/thirdparty/eigen/lapack/complex_double.cpp @@ -0,0 +1,18 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define SCALAR std::complex +#define SCALAR_SUFFIX z +#define SCALAR_SUFFIX_UP "Z" +#define REAL_SCALAR_SUFFIX d +#define ISCOMPLEX 1 + +#include "cholesky.cpp" +#include "lu.cpp" +#include "svd.cpp" diff --git a/thirdparty/eigen/lapack/complex_single.cpp b/thirdparty/eigen/lapack/complex_single.cpp new file mode 100644 index 000000000..6d11b26cd --- /dev/null +++ b/thirdparty/eigen/lapack/complex_single.cpp @@ -0,0 +1,18 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define SCALAR std::complex +#define SCALAR_SUFFIX c +#define SCALAR_SUFFIX_UP "C" +#define REAL_SCALAR_SUFFIX s +#define ISCOMPLEX 1 + +#include "cholesky.cpp" +#include "lu.cpp" +#include "svd.cpp" diff --git a/thirdparty/eigen/lapack/dladiv.f b/thirdparty/eigen/lapack/dladiv.f new file mode 100644 index 000000000..090a90654 --- /dev/null +++ b/thirdparty/eigen/lapack/dladiv.f @@ -0,0 +1,128 @@ +*> \brief \b DLADIV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLADIV performs complex division in real arithmetic +*> +*> a + i*b +*> p + i*q = --------- +*> c + i*d +*> +*> The algorithm is due to Robert L. Smith and can be found +*> in D. Knuth, The art of Computer Programming, Vol.2, p.195 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION +*> The scalars a, b, c, and d in the above expression. +*> \endverbatim +*> +*> \param[out] P +*> \verbatim +*> P is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION +*> The scalars p and q in the above expression. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION E, F +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( D ).LT.ABS( C ) ) THEN + E = D / C + F = C + D*E + P = ( A+B*E ) / F + Q = ( B-A*E ) / F + ELSE + E = C / D + F = D + C*E + P = ( B+A*E ) / F + Q = ( -A+B*E ) / F + END IF +* + RETURN +* +* End of DLADIV +* + END diff --git a/thirdparty/eigen/lapack/dlamch.f b/thirdparty/eigen/lapack/dlamch.f new file mode 100644 index 000000000..eb307e5e1 --- /dev/null +++ b/thirdparty/eigen/lapack/dlamch.f @@ -0,0 +1,189 @@ +*> \brief \b DLAMCH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMCH determines double precision machine parameters. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CMACH +*> \verbatim +*> Specifies the value to be returned by DLAMCH: +*> = 'E' or 'e', DLAMCH := eps +*> = 'S' or 's , DLAMCH := sfmin +*> = 'B' or 'b', DLAMCH := base +*> = 'P' or 'p', DLAMCH := eps*base +*> = 'N' or 'n', DLAMCH := t +*> = 'R' or 'r', DLAMCH := rnd +*> = 'M' or 'm', DLAMCH := emin +*> = 'U' or 'u', DLAMCH := rmin +*> = 'L' or 'l', DLAMCH := emax +*> = 'O' or 'o', DLAMCH := rmax +*> where +*> eps = relative machine precision +*> sfmin = safe minimum, such that 1/sfmin does not overflow +*> base = base of the machine +*> prec = eps*base +*> t = number of (base) digits in the mantissa +*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +*> emin = minimum exponent before (gradual) underflow +*> rmin = underflow threshold - base**(emin-1) +*> emax = largest exponent before overflow +*> rmax = overflow threshold - (base**emax)*(1-eps) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION RND, EPS, SFMIN, SMALL, RMACH +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DIGITS, EPSILON, HUGE, MAXEXPONENT, + $ MINEXPONENT, RADIX, TINY +* .. +* .. Executable Statements .. +* +* +* Assume rounding, not chopping. Always. +* + RND = ONE +* + IF( ONE.EQ.RND ) THEN + EPS = EPSILON(ZERO) * 0.5 + ELSE + EPS = EPSILON(ZERO) + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + SFMIN = TINY(ZERO) + SMALL = ONE / HUGE(ZERO) + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = EPS * RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = DIGITS(ZERO) + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = MINEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = tiny(zero) + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = MAXEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = HUGE(ZERO) + ELSE + RMACH = ZERO + END IF +* + DLAMCH = RMACH + RETURN +* +* End of DLAMCH +* + END +************************************************************************ +*> \brief \b DLAMC3 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC3 is intended to force A and B to be stored prior to doing +*> the addition of A and B , for use in situations where optimizers +*> might hold one of these in a register. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date November 2011 +*> \ingroup auxOTHERauxiliary +*> +*> \param[in] A +*> \verbatim +*> A is a DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is a DOUBLE PRECISION +*> The values A and B. +*> \endverbatim +*> + DOUBLE PRECISION FUNCTION DLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B +* .. +* ===================================================================== +* +* .. Executable Statements .. +* + DLAMC3 = A + B +* + RETURN +* +* End of DLAMC3 +* + END +* +************************************************************************ diff --git a/thirdparty/eigen/lapack/dlapy2.f b/thirdparty/eigen/lapack/dlapy2.f new file mode 100644 index 000000000..e6a62bf4a --- /dev/null +++ b/thirdparty/eigen/lapack/dlapy2.f @@ -0,0 +1,104 @@ +*> \brief \b DLAPY2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +*> overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> X and Y specify the values x and y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + DLAPY2 = W + ELSE + DLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + RETURN +* +* End of DLAPY2 +* + END diff --git a/thirdparty/eigen/lapack/dlapy3.f b/thirdparty/eigen/lapack/dlapy3.f new file mode 100644 index 000000000..ae9844f80 --- /dev/null +++ b/thirdparty/eigen/lapack/dlapy3.f @@ -0,0 +1,111 @@ +*> \brief \b DLAPY3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPY3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION X, Y, Z +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +*> unnecessary overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION +*> X, Y and Z specify the values x, y and z. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y, Z +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN +* W can be zero for max(0,nan,0) +* adding all three entries together will make sure +* NaN will not disappear. + DLAPY3 = XABS + YABS + ZABS + ELSE + DLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of DLAPY3 +* + END diff --git a/thirdparty/eigen/lapack/dlarf.f b/thirdparty/eigen/lapack/dlarf.f new file mode 100644 index 000000000..2a82ff439 --- /dev/null +++ b/thirdparty/eigen/lapack/dlarf.f @@ -0,0 +1,227 @@ +*> \brief \b DLARF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARF applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILADLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILADLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + CALL DGEMV( 'Transpose', LASTV, LASTC, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T +* + CALL DGER( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL DGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T +* + CALL DGER( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of DLARF +* + END diff --git a/thirdparty/eigen/lapack/dlarfb.f b/thirdparty/eigen/lapack/dlarfb.f new file mode 100644 index 000000000..206d3b268 --- /dev/null +++ b/thirdparty/eigen/lapack/dlarfb.f @@ -0,0 +1,762 @@ +*> \brief \b DLARFB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFB applies a real block reflector H or its transpose H**T to a +*> real m by n matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'T': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The triangular k by k matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J, LASTV, LASTC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILADLR( M, K, V, LDV ) ) + LASTC = ILADLC( LASTV, N, C, LDC ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C1**T +* + DO 10 J = 1, K + CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**T *V2 +* + CALL DGEMM( 'Transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2 * W**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 30 J = 1, K + DO 20 I = 1, LASTC + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILADLR( N, K, V, LDV ) ) + LASTC = ILADLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILADLR( M, K, V, LDV ) ) + LASTC = ILADLC( LASTV, N, C, LDC ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C2**T +* + DO 70 J = 1, K + CALL DCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**T*V1 +* + CALL DGEMM( 'Transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1 * W**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 90 J = 1, K + DO 80 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - WORK(I, J) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILADLR( N, K, V, LDV ) ) + LASTC = ILADLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL DCOPY( LASTC, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) - WORK(I, J) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILADLC( K, M, V, LDV ) ) + LASTC = ILADLC( LASTV, N, C, LDC ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C1**T +* + DO 130 J = 1, K + CALL DCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**T*V2**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2**T * W**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 150 J = 1, K + DO 140 I = 1, LASTC + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILADLC( K, N, V, LDV ) ) + LASTC = ILADLR( M, LASTV, C, LDC ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL DCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILADLC( K, M, V, LDV ) ) + LASTC = ILADLC( LASTV, N, C, LDC ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C2**T +* + DO 190 J = 1, K + CALL DCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**T * V1**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1**T * W**T +* + CALL DGEMM( 'Transpose', 'Transpose', + $ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 210 J = 1, K + DO 200 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - WORK(I, J) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILADLC( K, N, V, LDV ) ) + LASTC = ILADLR( M, LASTV, C, LDC ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL DCOPY( LASTC, C( 1, LASTV-K+J ), 1, + $ WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1**T +* + CALL DGEMM( 'No transpose', 'Transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) - WORK(I, J) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of DLARFB +* + END diff --git a/thirdparty/eigen/lapack/dlarfg.f b/thirdparty/eigen/lapack/dlarfg.f new file mode 100644 index 000000000..458ad2e05 --- /dev/null +++ b/thirdparty/eigen/lapack/dlarfg.f @@ -0,0 +1,196 @@ +*> \brief \b DLARFG +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFG generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, and x is an (n-1)-element real +*> vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> +*> Otherwise 1 <= tau <= 2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 + EXTERNAL DLAMCH, DLAPY2, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + 10 CONTINUE + KNT = KNT + 1 + CALL DSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DNRM2( N-1, X, INCX ) + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + TAU = ( BETA-ALPHA ) / BETA + CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of DLARFG +* + END diff --git a/thirdparty/eigen/lapack/dlarft.f b/thirdparty/eigen/lapack/dlarft.f new file mode 100644 index 000000000..4b7550403 --- /dev/null +++ b/thirdparty/eigen/lapack/dlarft.f @@ -0,0 +1,326 @@ +*> \brief \b DLARFT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/thirdparty/eigen/lapack/double.cpp b/thirdparty/eigen/lapack/double.cpp new file mode 100644 index 000000000..ea78bb662 --- /dev/null +++ b/thirdparty/eigen/lapack/double.cpp @@ -0,0 +1,18 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define SCALAR double +#define SCALAR_SUFFIX d +#define SCALAR_SUFFIX_UP "D" +#define ISCOMPLEX 0 + +#include "cholesky.cpp" +#include "lu.cpp" +#include "eigenvalues.cpp" +#include "svd.cpp" diff --git a/thirdparty/eigen/lapack/dsecnd_NONE.f b/thirdparty/eigen/lapack/dsecnd_NONE.f new file mode 100644 index 000000000..61a8dff13 --- /dev/null +++ b/thirdparty/eigen/lapack/dsecnd_NONE.f @@ -0,0 +1,52 @@ +*> \brief \b DSECND returns nothing +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DSECND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSECND returns nothing instead of returning the user time for a process in seconds. +*> If you are using that routine, it means that neither EXTERNAL ETIME, +*> EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on +*> your machine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* ===================================================================== +* + DSECND = 0.0D+0 + RETURN +* +* End of DSECND +* + END diff --git a/thirdparty/eigen/lapack/eigenvalues.cpp b/thirdparty/eigen/lapack/eigenvalues.cpp new file mode 100644 index 000000000..921c51569 --- /dev/null +++ b/thirdparty/eigen/lapack/eigenvalues.cpp @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "lapack_common.h" +#include + +// computes eigen values and vectors of a general N-by-N matrix A +EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info)) +{ + // TODO exploit the work buffer + bool query_size = *lwork==-1; + + *info = 0; + if(*jobz!='N' && *jobz!='V') *info = -1; + else if(UPLO(*uplo)==INVALID) *info = -2; + else if(*n<0) *info = -3; + else if(*lda eig(mat,computeVectors?ComputeEigenvectors:EigenvaluesOnly); + + if(eig.info()==NoConvergence) + { + make_vector(w,*n).setZero(); + if(computeVectors) + matrix(a,*n,*n,*lda).setIdentity(); + //*info = 1; + return 0; + } + + make_vector(w,*n) = eig.eigenvalues(); + if(computeVectors) + matrix(a,*n,*n,*lda) = eig.eigenvectors(); + + return 0; +} diff --git a/thirdparty/eigen/lapack/ilaclc.f b/thirdparty/eigen/lapack/ilaclc.f new file mode 100644 index 000000000..4ceb61c52 --- /dev/null +++ b/thirdparty/eigen/lapack/ilaclc.f @@ -0,0 +1,118 @@ +*> \brief \b ILACLC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILACLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILACLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILACLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILACLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = (0.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILACLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILACLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILACLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILACLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/ilaclr.f b/thirdparty/eigen/lapack/ilaclr.f new file mode 100644 index 000000000..d8ab09c55 --- /dev/null +++ b/thirdparty/eigen/lapack/ilaclr.f @@ -0,0 +1,121 @@ +*> \brief \b ILACLR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILACLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILACLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILACLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILACLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = (0.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILACLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILACLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILACLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILACLR = MAX( ILACLR, I ) + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/iladlc.f b/thirdparty/eigen/lapack/iladlc.f new file mode 100644 index 000000000..f84bd833a --- /dev/null +++ b/thirdparty/eigen/lapack/iladlc.f @@ -0,0 +1,118 @@ +*> \brief \b ILADLC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILADLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILADLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILADLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILADLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILADLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILADLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/iladlr.f b/thirdparty/eigen/lapack/iladlr.f new file mode 100644 index 000000000..2114c6164 --- /dev/null +++ b/thirdparty/eigen/lapack/iladlr.f @@ -0,0 +1,121 @@ +*> \brief \b ILADLR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILADLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILADLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILADLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILADLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILADLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILADLR = MAX( ILADLR, I ) + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/ilaslc.f b/thirdparty/eigen/lapack/ilaslc.f new file mode 100644 index 000000000..e3db0f4ae --- /dev/null +++ b/thirdparty/eigen/lapack/ilaslc.f @@ -0,0 +1,118 @@ +*> \brief \b ILASLC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILASLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILASLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILASLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILASLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILASLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILASLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILASLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILASLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/ilaslr.f b/thirdparty/eigen/lapack/ilaslr.f new file mode 100644 index 000000000..48b73f44d --- /dev/null +++ b/thirdparty/eigen/lapack/ilaslr.f @@ -0,0 +1,121 @@ +*> \brief \b ILASLR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILASLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILASLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILASLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILASLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILASLR = M + ELSEIF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILASLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILASLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILASLR = MAX( ILASLR, I ) + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/ilazlc.f b/thirdparty/eigen/lapack/ilazlc.f new file mode 100644 index 000000000..15b149022 --- /dev/null +++ b/thirdparty/eigen/lapack/ilazlc.f @@ -0,0 +1,118 @@ +*> \brief \b ILAZLC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAZLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAZLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAZLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILAZLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILAZLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILAZLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILAZLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILAZLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/ilazlr.f b/thirdparty/eigen/lapack/ilazlr.f new file mode 100644 index 000000000..b2ab943ca --- /dev/null +++ b/thirdparty/eigen/lapack/ilazlr.f @@ -0,0 +1,121 @@ +*> \brief \b ILAZLR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAZLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAZLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAZLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILAZLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILAZLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILAZLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILAZLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILAZLR = MAX( ILAZLR, I ) + END DO + END IF + RETURN + END diff --git a/thirdparty/eigen/lapack/lapack_common.h b/thirdparty/eigen/lapack/lapack_common.h new file mode 100644 index 000000000..c872a813e --- /dev/null +++ b/thirdparty/eigen/lapack/lapack_common.h @@ -0,0 +1,29 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LAPACK_COMMON_H +#define EIGEN_LAPACK_COMMON_H + +#include "../blas/common.h" +#include "../Eigen/src/misc/lapack.h" + +#define EIGEN_LAPACK_FUNC(FUNC,ARGLIST) \ + extern "C" { int EIGEN_BLAS_FUNC(FUNC) ARGLIST; } \ + int EIGEN_BLAS_FUNC(FUNC) ARGLIST + +typedef Eigen::Map > PivotsType; + +#if ISCOMPLEX +#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X, +#else +#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) +#endif + + +#endif // EIGEN_LAPACK_COMMON_H diff --git a/thirdparty/eigen/lapack/lu.cpp b/thirdparty/eigen/lapack/lu.cpp new file mode 100644 index 000000000..90cebe0f4 --- /dev/null +++ b/thirdparty/eigen/lapack/lu.cpp @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "common.h" +#include + +// computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges +EIGEN_LAPACK_FUNC(getrf,(int *m, int *n, RealScalar *pa, int *lda, int *ipiv, int *info)) +{ + *info = 0; + if(*m<0) *info = -1; + else if(*n<0) *info = -2; + else if(*lda(pa); + int nb_transpositions; + int ret = int(Eigen::internal::partial_lu_impl + ::blocked_lu(*m, *n, a, *lda, ipiv, nb_transpositions)); + + for(int i=0; i=0) + *info = ret+1; + + return 0; +} + +//GETRS solves a system of linear equations +// A * X = B or A' * X = B +// with a general N-by-N matrix A using the LU factorization computed by GETRF +EIGEN_LAPACK_FUNC(getrs,(char *trans, int *n, int *nrhs, RealScalar *pa, int *lda, int *ipiv, RealScalar *pb, int *ldb, int *info)) +{ + *info = 0; + if(OP(*trans)==INVALID) *info = -1; + else if(*n<0) *info = -2; + else if(*nrhs<0) *info = -3; + else if(*lda(pa); + Scalar* b = reinterpret_cast(pb); + MatrixType lu(a,*n,*n,*lda); + MatrixType B(b,*n,*nrhs,*ldb); + + for(int i=0; i<*n; ++i) + ipiv[i]--; + if(OP(*trans)==NOTR) + { + B = PivotsType(ipiv,*n) * B; + lu.triangularView().solveInPlace(B); + lu.triangularView().solveInPlace(B); + } + else if(OP(*trans)==TR) + { + lu.triangularView().transpose().solveInPlace(B); + lu.triangularView().transpose().solveInPlace(B); + B = PivotsType(ipiv,*n).transpose() * B; + } + else if(OP(*trans)==ADJ) + { + lu.triangularView().adjoint().solveInPlace(B); + lu.triangularView().adjoint().solveInPlace(B); + B = PivotsType(ipiv,*n).transpose() * B; + } + for(int i=0; i<*n; ++i) + ipiv[i]++; + + return 0; +} diff --git a/thirdparty/eigen/lapack/second_NONE.f b/thirdparty/eigen/lapack/second_NONE.f new file mode 100644 index 000000000..d3e6d3319 --- /dev/null +++ b/thirdparty/eigen/lapack/second_NONE.f @@ -0,0 +1,52 @@ +*> \brief \b SECOND returns nothing +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SECOND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SECOND returns nothing instead of returning the user time for a process in seconds. +*> If you are using that routine, it means that neither EXTERNAL ETIME, +*> EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on +*> your machine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* ===================================================================== +* + SECOND = 0.0E+0 + RETURN +* +* End of SECOND +* + END diff --git a/thirdparty/eigen/lapack/single.cpp b/thirdparty/eigen/lapack/single.cpp new file mode 100644 index 000000000..c7da3effa --- /dev/null +++ b/thirdparty/eigen/lapack/single.cpp @@ -0,0 +1,18 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define SCALAR float +#define SCALAR_SUFFIX s +#define SCALAR_SUFFIX_UP "S" +#define ISCOMPLEX 0 + +#include "cholesky.cpp" +#include "lu.cpp" +#include "eigenvalues.cpp" +#include "svd.cpp" diff --git a/thirdparty/eigen/lapack/sladiv.f b/thirdparty/eigen/lapack/sladiv.f new file mode 100644 index 000000000..da3afa36b --- /dev/null +++ b/thirdparty/eigen/lapack/sladiv.f @@ -0,0 +1,128 @@ +*> \brief \b SLADIV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLADIV( A, B, C, D, P, Q ) +* +* .. Scalar Arguments .. +* REAL A, B, C, D, P, Q +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLADIV performs complex division in real arithmetic +*> +*> a + i*b +*> p + i*q = --------- +*> c + i*d +*> +*> The algorithm is due to Robert L. Smith and can be found +*> in D. Knuth, The art of Computer Programming, Vol.2, p.195 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is REAL +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL +*> The scalars a, b, c, and d in the above expression. +*> \endverbatim +*> +*> \param[out] P +*> \verbatim +*> P is REAL +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL +*> The scalars p and q in the above expression. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + REAL A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL E, F +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( D ).LT.ABS( C ) ) THEN + E = D / C + F = C + D*E + P = ( A+B*E ) / F + Q = ( B-A*E ) / F + ELSE + E = C / D + F = D + C*E + P = ( B+A*E ) / F + Q = ( -A+B*E ) / F + END IF +* + RETURN +* +* End of SLADIV +* + END diff --git a/thirdparty/eigen/lapack/slamch.f b/thirdparty/eigen/lapack/slamch.f new file mode 100644 index 000000000..4bffad0eb --- /dev/null +++ b/thirdparty/eigen/lapack/slamch.f @@ -0,0 +1,192 @@ +*> \brief \b SLAMCH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SLAMCH( CMACH ) +* +* .. Scalar Arguments .. +* CHARACTER CMACH +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAMCH determines single precision machine parameters. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CMACH +*> \verbatim +*> Specifies the value to be returned by SLAMCH: +*> = 'E' or 'e', SLAMCH := eps +*> = 'S' or 's , SLAMCH := sfmin +*> = 'B' or 'b', SLAMCH := base +*> = 'P' or 'p', SLAMCH := eps*base +*> = 'N' or 'n', SLAMCH := t +*> = 'R' or 'r', SLAMCH := rnd +*> = 'M' or 'm', SLAMCH := emin +*> = 'U' or 'u', SLAMCH := rmin +*> = 'L' or 'l', SLAMCH := emax +*> = 'O' or 'o', SLAMCH := rmax +*> where +*> eps = relative machine precision +*> sfmin = safe minimum, such that 1/sfmin does not overflow +*> base = base of the machine +*> prec = eps*base +*> t = number of (base) digits in the mantissa +*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +*> emin = minimum exponent before (gradual) underflow +*> rmin = underflow threshold - base**(emin-1) +*> emax = largest exponent before overflow +*> rmax = overflow threshold - (base**emax)*(1-eps) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL RND, EPS, SFMIN, SMALL, RMACH +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DIGITS, EPSILON, HUGE, MAXEXPONENT, + $ MINEXPONENT, RADIX, TINY +* .. +* .. Executable Statements .. +* +* +* Assume rounding, not chopping. Always. +* + RND = ONE +* + IF( ONE.EQ.RND ) THEN + EPS = EPSILON(ZERO) * 0.5 + ELSE + EPS = EPSILON(ZERO) + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + SFMIN = TINY(ZERO) + SMALL = ONE / HUGE(ZERO) + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = EPS * RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = DIGITS(ZERO) + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = MINEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = tiny(zero) + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = MAXEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = HUGE(ZERO) + ELSE + RMACH = ZERO + END IF +* + SLAMCH = RMACH + RETURN +* +* End of SLAMCH +* + END +************************************************************************ +*> \brief \b SLAMC3 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC3 is intended to force A and B to be stored prior to doing +*> the addition of A and B , for use in situations where optimizers +*> might hold one of these in a register. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date November 2011 +*> \ingroup auxOTHERauxiliary +*> +*> \param[in] A +*> \verbatim +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> The values A and B. +*> \endverbatim +*> +* + REAL FUNCTION SLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + REAL A, B +* .. +* ===================================================================== +* +* .. Executable Statements .. +* + SLAMC3 = A + B +* + RETURN +* +* End of SLAMC3 +* + END +* +************************************************************************ diff --git a/thirdparty/eigen/lapack/slapy2.f b/thirdparty/eigen/lapack/slapy2.f new file mode 100644 index 000000000..1f6b1ca4f --- /dev/null +++ b/thirdparty/eigen/lapack/slapy2.f @@ -0,0 +1,104 @@ +*> \brief \b SLAPY2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLAPY2( X, Y ) +* +* .. Scalar Arguments .. +* REAL X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +*> overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is REAL +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is REAL +*> X and Y specify the values x and y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + REAL X, Y +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, Z +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + SLAPY2 = W + ELSE + SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + RETURN +* +* End of SLAPY2 +* + END diff --git a/thirdparty/eigen/lapack/slapy3.f b/thirdparty/eigen/lapack/slapy3.f new file mode 100644 index 000000000..aa2f5bfc4 --- /dev/null +++ b/thirdparty/eigen/lapack/slapy3.f @@ -0,0 +1,111 @@ +*> \brief \b SLAPY3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPY3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLAPY3( X, Y, Z ) +* +* .. Scalar Arguments .. +* REAL X, Y, Z +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +*> unnecessary overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is REAL +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is REAL +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL +*> X, Y and Z specify the values x, y and z. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + REAL X, Y, Z +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN +* W can be zero for max(0,nan,0) +* adding all three entries together will make sure +* NaN will not disappear. + SLAPY3 = XABS + YABS + ZABS + ELSE + SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of SLAPY3 +* + END diff --git a/thirdparty/eigen/lapack/slarf.f b/thirdparty/eigen/lapack/slarf.f new file mode 100644 index 000000000..8a8ff308e --- /dev/null +++ b/thirdparty/eigen/lapack/slarf.f @@ -0,0 +1,227 @@ +*> \brief \b SLARF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARF applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILASLR, ILASLC + EXTERNAL LSAME, ILASLR, ILASLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILASLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILASLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + CALL SGEMV( 'Transpose', LASTV, LASTC, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T +* + CALL SGER( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL SGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T +* + CALL SGER( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of SLARF +* + END diff --git a/thirdparty/eigen/lapack/slarfb.f b/thirdparty/eigen/lapack/slarfb.f new file mode 100644 index 000000000..eb95990b3 --- /dev/null +++ b/thirdparty/eigen/lapack/slarfb.f @@ -0,0 +1,763 @@ +*> \brief \b SLARFB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFB applies a real block reflector H or its transpose H**T to a +*> real m by n matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'T': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The triangular k by k matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J, LASTV, LASTC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILASLR, ILASLC + EXTERNAL LSAME, ILASLR, ILASLC +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILASLR( M, K, V, LDV ) ) + LASTC = ILASLC( LASTV, N, C, LDC ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C1**T +* + DO 10 J = 1, K + CALL SCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**T *V2 +* + CALL SGEMM( 'Transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2 * W**T +* + CALL SGEMM( 'No transpose', 'Transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 30 J = 1, K + DO 20 I = 1, LASTC + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILASLR( N, K, V, LDV ) ) + LASTC = ILASLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL SCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2**T +* + CALL SGEMM( 'No transpose', 'Transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILASLR( M, K, V, LDV ) ) + LASTC = ILASLC( LASTV, N, C, LDC ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C2**T +* + DO 70 J = 1, K + CALL SCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**T*V1 +* + CALL SGEMM( 'Transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1 * W**T +* + CALL SGEMM( 'No transpose', 'Transpose', + $ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 90 J = 1, K + DO 80 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - WORK(I, J) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILASLR( N, K, V, LDV ) ) + LASTC = ILASLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL SCOPY( LASTC, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1**T +* + CALL SGEMM( 'No transpose', 'Transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) - WORK(I, J) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILASLC( K, M, V, LDV ) ) + LASTC = ILASLC( LASTV, N, C, LDC ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C1**T +* + DO 130 J = 1, K + CALL SCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**T*V2**T +* + CALL SGEMM( 'Transpose', 'Transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2**T * W**T +* + CALL SGEMM( 'Transpose', 'Transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 150 J = 1, K + DO 140 I = 1, LASTC + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILASLC( K, N, V, LDV ) ) + LASTC = ILASLR( M, LASTV, C, LDC ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL SCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2**T +* + CALL SGEMM( 'No transpose', 'Transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILASLC( K, M, V, LDV ) ) + LASTC = ILASLC( LASTV, N, C, LDC ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C2**T +* + DO 190 J = 1, K + CALL SCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**T * V1**T +* + CALL SGEMM( 'Transpose', 'Transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1**T * W**T +* + CALL SGEMM( 'Transpose', 'Transpose', + $ LASTV-K, LASTC, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 210 J = 1, K + DO 200 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - WORK(I, J) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* + LASTV = MAX( K, ILASLC( K, N, V, LDV ) ) + LASTC = ILASLR( M, LASTV, C, LDC ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL SCOPY( LASTC, C( 1, LASTV-K+J ), 1, + $ WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1**T +* + CALL SGEMM( 'No transpose', 'Transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) + $ - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of SLARFB +* + END diff --git a/thirdparty/eigen/lapack/slarfg.f b/thirdparty/eigen/lapack/slarfg.f new file mode 100644 index 000000000..4f10ffcaf --- /dev/null +++ b/thirdparty/eigen/lapack/slarfg.f @@ -0,0 +1,196 @@ +*> \brief \b SLARFG +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL ALPHA, TAU +* .. +* .. Array Arguments .. +* REAL X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFG generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, and x is an (n-1)-element real +*> vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> +*> Otherwise 1 <= tau <= 2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL ALPHA, TAU +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2, SNRM2 + EXTERNAL SLAMCH, SLAPY2, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + 10 CONTINUE + KNT = KNT + 1 + CALL SSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SNRM2( N-1, X, INCX ) + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + TAU = ( BETA-ALPHA ) / BETA + CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of SLARFG +* + END diff --git a/thirdparty/eigen/lapack/slarft.f b/thirdparty/eigen/lapack/slarft.f new file mode 100644 index 000000000..30b0668e4 --- /dev/null +++ b/thirdparty/eigen/lapack/slarft.f @@ -0,0 +1,326 @@ +*> \brief \b SLARFT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL SGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of SLARFT +* + END diff --git a/thirdparty/eigen/lapack/svd.cpp b/thirdparty/eigen/lapack/svd.cpp new file mode 100644 index 000000000..77b302b6b --- /dev/null +++ b/thirdparty/eigen/lapack/svd.cpp @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "lapack_common.h" +#include + +// computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer +EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork, + EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info)) +{ + // TODO exploit the work buffer + bool query_size = *lwork==-1; + int diag_size = (std::min)(*m,*n); + + *info = 0; + if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N') *info = -1; + else if(*m<0) *info = -2; + else if(*n<0) *info = -3; + else if(*lda=*n && *ldvt<*n)) *info = -10; + + if(*info!=0) + { + int e = -*info; + return xerbla_(SCALAR_SUFFIX_UP"GESDD ", &e, 6); + } + + if(query_size) + { + *lwork = 0; + return 0; + } + + if(*n==0 || *m==0) + return 0; + + PlainMatrixType mat(*m,*n); + mat = matrix(a,*m,*n,*lda); + + int option = *jobz=='A' ? ComputeFullU|ComputeFullV + : *jobz=='S' ? ComputeThinU|ComputeThinV + : *jobz=='O' ? ComputeThinU|ComputeThinV + : 0; + + BDCSVD svd(mat,option); + + make_vector(s,diag_size) = svd.singularValues().head(diag_size); + + if(*jobz=='A') + { + matrix(u,*m,*m,*ldu) = svd.matrixU(); + matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + } + else if(*jobz=='S') + { + matrix(u,*m,diag_size,*ldu) = svd.matrixU(); + matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); + } + else if(*jobz=='O' && *m>=*n) + { + matrix(a,*m,*n,*lda) = svd.matrixU(); + matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + } + else if(*jobz=='O') + { + matrix(u,*m,*m,*ldu) = svd.matrixU(); + matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); + } + + return 0; +} + +// computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm +EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork, + EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info)) +{ + // TODO exploit the work buffer + bool query_size = *lwork==-1; + int diag_size = (std::min)(*m,*n); + + *info = 0; + if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1; + else if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N') + || (*jobu=='O' && *jobv=='O')) *info = -2; + else if(*m<0) *info = -3; + else if(*n<0) *info = -4; + else if(*lda svd(mat,option); + + make_vector(s,diag_size) = svd.singularValues().head(diag_size); + { + if(*jobu=='A') matrix(u,*m,*m,*ldu) = svd.matrixU(); + else if(*jobu=='S') matrix(u,*m,diag_size,*ldu) = svd.matrixU(); + else if(*jobu=='O') matrix(a,*m,diag_size,*lda) = svd.matrixU(); + } + { + if(*jobv=='A') matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + else if(*jobv=='S') matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); + else if(*jobv=='O') matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); + } + return 0; +} diff --git a/thirdparty/eigen/lapack/zlacgv.f b/thirdparty/eigen/lapack/zlacgv.f new file mode 100644 index 000000000..16c2e2ed9 --- /dev/null +++ b/thirdparty/eigen/lapack/zlacgv.f @@ -0,0 +1,116 @@ +*> \brief \b ZLACGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACGV( N, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACGV conjugates a complex vector of length N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vector X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-1)*abs(INCX)) +*> On entry, the vector of length N to be conjugated. +*> On exit, X is overwritten with conjg(X). +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive elements of X. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = DCONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = DCONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of ZLACGV +* + END diff --git a/thirdparty/eigen/lapack/zladiv.f b/thirdparty/eigen/lapack/zladiv.f new file mode 100644 index 000000000..aa71db14a --- /dev/null +++ b/thirdparty/eigen/lapack/zladiv.f @@ -0,0 +1,97 @@ +*> \brief \b ZLADIV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* COMPLEX*16 FUNCTION ZLADIV( X, Y ) +* +* .. Scalar Arguments .. +* COMPLEX*16 X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLADIV := X / Y, where X and Y are complex. The computation of X / Y +*> will not overflow on an intermediary step unless the results +*> overflows. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX*16 +*> The complex scalars X and Y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + COMPLEX*16 FUNCTION ZLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 X, Y +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL DLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* + CALL DLADIV( DBLE( X ), DIMAG( X ), DBLE( Y ), DIMAG( Y ), ZR, + $ ZI ) + ZLADIV = DCMPLX( ZR, ZI ) +* + RETURN +* +* End of ZLADIV +* + END diff --git a/thirdparty/eigen/lapack/zlarf.f b/thirdparty/eigen/lapack/zlarf.f new file mode 100644 index 000000000..53f314d64 --- /dev/null +++ b/thirdparty/eigen/lapack/zlarf.f @@ -0,0 +1,232 @@ +*> \brief \b ZLARF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARF applies a complex elementary reflector H to a complex M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H, supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +* Set up variables for scanning V. LASTV begins pointing to the end +* of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +* Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +* Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILAZLC(LASTV, N, C, LDC) + ELSE +* Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILAZLR(M, LASTV, C, LDC) + END IF + END IF +* Note that lastc.eq.0 renders the BLAS operations null; no special +* case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) +* + CALL ZGEMV( 'Conjugate transpose', LASTV, LASTC, ONE, + $ C, LDC, V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**H +* + CALL ZGERC( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL ZGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**H +* + CALL ZGERC( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of ZLARF +* + END diff --git a/thirdparty/eigen/lapack/zlarfb.f b/thirdparty/eigen/lapack/zlarfb.f new file mode 100644 index 000000000..30fc4b940 --- /dev/null +++ b/thirdparty/eigen/lapack/zlarfb.f @@ -0,0 +1,774 @@ +*> \brief \b ZLARFB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFB applies a complex block reflector H or its transpose H**H to a +*> complex M-by-N matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J, LASTV, LASTC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZLACGV, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILAZLR( M, K, V, LDV ) ) + LASTC = ILAZLC( LASTV, N, C, LDC ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C1**H +* + DO 10 J = 1, K + CALL ZCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**H *V2 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 30 J = 1, K + DO 20 I = 1, LASTC + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILAZLR( N, K, V, LDV ) ) + LASTC = ILAZLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL ZCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILAZLR( M, K, V, LDV ) ) + LASTC = ILAZLC( LASTV, N, C, LDC ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C2**H +* + DO 70 J = 1, K + CALL ZCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**H*V1 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1 * W**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 90 J = 1, K + DO 80 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILAZLR( N, K, V, LDV ) ) + LASTC = ILAZLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL ZCOPY( LASTC, C( 1, LASTV-K+J ), 1, + $ WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( LASTV-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) + $ - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILAZLC( K, M, V, LDV ) ) + LASTC = ILAZLC( LASTV, N, C, LDC ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C1**H +* + DO 130 J = 1, K + CALL ZCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**H*V2**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2**H * W**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTV-K, LASTC, K, + $ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 150 J = 1, K + DO 140 I = 1, LASTC + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILAZLC( K, N, V, LDV ) ) + LASTC = ILAZLR( M, LASTV, C, LDC ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL ZCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, K, LASTV-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILAZLC( K, M, V, LDV ) ) + LASTC = ILAZLC( LASTV, N, C, LDC ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C2**H +* + DO 190 J = 1, K + CALL ZCOPY( LASTC, C( LASTV-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1**H * V1**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTC, K, LASTV-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - V1**H * W**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTV-K, LASTC, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 210 J = 1, K + DO 200 I = 1, LASTC + C( LASTV-K+J, I ) = C( LASTV-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILAZLC( K, N, V, LDV ) ) + LASTC = ILAZLR( M, LASTV, C, LDC ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL ZCOPY( LASTC, C( 1, LASTV-K+J ), 1, + $ WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C1 * V1**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, K, LASTV-K, ONE, C, LDC, V, LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, LASTV-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, LASTC + C( I, LASTV-K+J ) = C( I, LASTV-K+J ) + $ - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of ZLARFB +* + END diff --git a/thirdparty/eigen/lapack/zlarfg.f b/thirdparty/eigen/lapack/zlarfg.f new file mode 100644 index 000000000..a90ae9f74 --- /dev/null +++ b/thirdparty/eigen/lapack/zlarfg.f @@ -0,0 +1,203 @@ +*> \brief \b ZLARFG +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFG generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, with beta real, and x is an +*> (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> +*> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY3, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHR = DBLE( ALPHA ) + ALPHI = DIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL ZDSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHA = DCMPLX( ALPHR, ALPHI ) + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) + CALL ZSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of ZLARFG +* + END diff --git a/thirdparty/eigen/lapack/zlarft.f b/thirdparty/eigen/lapack/zlarft.f new file mode 100644 index 000000000..6a6151fd0 --- /dev/null +++ b/thirdparty/eigen/lapack/zlarft.f @@ -0,0 +1,327 @@ +*> \brief \b ZLARFT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.4.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZLACGV, ZTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/thirdparty/eigen/scripts/CMakeLists.txt b/thirdparty/eigen/scripts/CMakeLists.txt new file mode 100644 index 000000000..0d9a631a9 --- /dev/null +++ b/thirdparty/eigen/scripts/CMakeLists.txt @@ -0,0 +1,6 @@ +get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) +configure_file(buildtests.in ${CMAKE_BINARY_DIR}/buildtests.sh @ONLY) + +configure_file(check.in ${CMAKE_BINARY_DIR}/check.sh COPYONLY) +configure_file(debug.in ${CMAKE_BINARY_DIR}/debug.sh COPYONLY) +configure_file(release.in ${CMAKE_BINARY_DIR}/release.sh COPYONLY) diff --git a/thirdparty/eigen/scripts/buildtests.in b/thirdparty/eigen/scripts/buildtests.in new file mode 100755 index 000000000..526d5b74b --- /dev/null +++ b/thirdparty/eigen/scripts/buildtests.in @@ -0,0 +1,22 @@ +#!/bin/bash + +if [[ $# != 1 || $1 == *help ]] +then + echo "usage: $0 regexp" + echo " Builds tests matching the regexp." + echo " The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'." + echo " For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'" + exit 0 +fi + +TESTSLIST="@EIGEN_TESTS_LIST@" +targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo` + +if [ -n "${EIGEN_MAKE_ARGS:+x}" ] +then + @CMAKE_MAKE_PROGRAM@ $targets_to_make ${EIGEN_MAKE_ARGS} +else + @CMAKE_MAKE_PROGRAM@ $targets_to_make @EIGEN_TEST_BUILD_FLAGS@ +fi +exit $? + diff --git a/thirdparty/eigen/scripts/cdashtesting.cmake.in b/thirdparty/eigen/scripts/cdashtesting.cmake.in new file mode 100644 index 000000000..59cf53328 --- /dev/null +++ b/thirdparty/eigen/scripts/cdashtesting.cmake.in @@ -0,0 +1,49 @@ + +set(CTEST_SOURCE_DIRECTORY "@CMAKE_SOURCE_DIR@") +set(CTEST_BINARY_DIRECTORY "@CMAKE_BINARY_DIR@") +set(CTEST_CMAKE_GENERATOR "@CMAKE_GENERATOR@") +set(CTEST_BUILD_NAME "@BUILDNAME@") +set(CTEST_SITE "@SITE@") + +set(MODEL Experimental) +if(${CTEST_SCRIPT_ARG} MATCHES Nightly) + set(MODEL Nightly) +elseif(${CTEST_SCRIPT_ARG} MATCHES Continuous) + set(MODEL Continuous) +endif() + +find_program(CTEST_HG_COMMAND NAMES hg) +set(CTEST_UPDATE_COMMAND "${CTEST_HG_COMMAND}") + +ctest_start(${MODEL} ${CTEST_SOURCE_DIRECTORY} ${CTEST_BINARY_DIRECTORY}) + +ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}") +ctest_submit(PARTS Update Notes) + +# to get CTEST_PROJECT_SUBPROJECTS definition: +include("${CTEST_SOURCE_DIRECTORY}/CTestConfig.cmake") + +foreach(subproject ${CTEST_PROJECT_SUBPROJECTS}) + message("") + message("Process ${subproject}") + + set_property(GLOBAL PROPERTY SubProject ${subproject}) + set_property(GLOBAL PROPERTY Label ${subproject}) + + ctest_configure(BUILD ${CTEST_BINARY_DIRECTORY} SOURCE ${CTEST_SOURCE_DIRECTORY} ) + ctest_submit(PARTS Configure) + + set(CTEST_BUILD_TARGET "Build${subproject}") + message("Build ${CTEST_BUILD_TARGET}") + ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}" APPEND) + # builds target ${CTEST_BUILD_TARGET} + ctest_submit(PARTS Build) + + ctest_test(BUILD "${CTEST_BINARY_DIRECTORY}" INCLUDE_LABEL "${subproject}" ) + # runs only tests that have a LABELS property matching "${subproject}" + + ctest_coverage(BUILD "${CTEST_BINARY_DIRECTORY}" LABELS "${subproject}" ) + + ctest_submit(PARTS Test) + +endforeach() diff --git a/thirdparty/eigen/scripts/check.in b/thirdparty/eigen/scripts/check.in new file mode 100755 index 000000000..7717e2d93 --- /dev/null +++ b/thirdparty/eigen/scripts/check.in @@ -0,0 +1,21 @@ +#!/bin/bash +# check : shorthand for make and ctest -R + +if [[ $# != 1 || $1 == *help ]] +then + echo "usage: $0 regexp" + echo " Builds and runs tests matching the regexp." + echo " The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'." + echo " For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'" + echo " The EIGEN_CTEST_ARGS environment variable allows to pass args to 'ctest'." + echo " For example, with CTest 2.8, you can use EIGEN_CTEST_ARGS='-j5'." + exit 0 +fi + +if [ -n "${EIGEN_CTEST_ARGS:+x}" ] +then + ./buildtests.sh "$1" && ctest -R "$1" ${EIGEN_CTEST_ARGS} +else + ./buildtests.sh "$1" && ctest -R "$1" +fi +exit $? diff --git a/thirdparty/eigen/scripts/debug.in b/thirdparty/eigen/scripts/debug.in new file mode 100755 index 000000000..d339d3d1f --- /dev/null +++ b/thirdparty/eigen/scripts/debug.in @@ -0,0 +1,3 @@ +#!/bin/sh + +cmake -DCMAKE_BUILD_TYPE=Debug . diff --git a/thirdparty/eigen/scripts/eigen_gen_credits.cpp b/thirdparty/eigen/scripts/eigen_gen_credits.cpp new file mode 100644 index 000000000..f2e81631d --- /dev/null +++ b/thirdparty/eigen/scripts/eigen_gen_credits.cpp @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +// this function takes a line that may contain a name and/or email address, +// and returns just the name, while fixing the "bad cases". +std::string contributor_name(const std::string& line) +{ + string result; + + // let's first take care of the case of isolated email addresses, like + // "user@localhost.localdomain" entries + if(line.find("markb@localhost.localdomain") != string::npos) + { + return "Mark Borgerding"; + } + + if(line.find("kayhman@contact.intra.cea.fr") != string::npos) + { + return "Guillaume Saupin"; + } + + // from there on we assume that we have a entry of the form + // either: + // Bla bli Blurp + // or: + // Bla bli Blurp + + size_t position_of_email_address = line.find_first_of('<'); + if(position_of_email_address != string::npos) + { + // there is an e-mail address in <...>. + + // Hauke once committed as "John Smith", fix that. + if(line.find("hauke.heibel") != string::npos) + result = "Hauke Heibel"; + else + { + // just remove the e-mail address + result = line.substr(0, position_of_email_address); + } + } + else + { + // there is no e-mail address in <...>. + + if(line.find("convert-repo") != string::npos) + result = ""; + else + result = line; + } + + // remove trailing spaces + size_t length = result.length(); + while(length >= 1 && result[length-1] == ' ') result.erase(--length); + + return result; +} + +// parses hg churn output to generate a contributors map. +map contributors_map_from_churn_output(const char *filename) +{ + map contributors_map; + + string line; + ifstream churn_out; + churn_out.open(filename, ios::in); + while(!getline(churn_out,line).eof()) + { + // remove the histograms "******" that hg churn may draw at the end of some lines + size_t first_star = line.find_first_of('*'); + if(first_star != string::npos) line.erase(first_star); + + // remove trailing spaces + size_t length = line.length(); + while(length >= 1 && line[length-1] == ' ') line.erase(--length); + + // now the last space indicates where the number starts + size_t last_space = line.find_last_of(' '); + + // get the number (of changesets or of modified lines for each contributor) + int number; + istringstream(line.substr(last_space+1)) >> number; + + // get the name of the contributor + line.erase(last_space); + string name = contributor_name(line); + + map::iterator it = contributors_map.find(name); + // if new contributor, insert + if(it == contributors_map.end()) + contributors_map.insert(pair(name, number)); + // if duplicate, just add the number + else + it->second += number; + } + churn_out.close(); + + return contributors_map; +} + +// find the last name, i.e. the last word. +// for "van den Schbling" types of last names, that's not a problem, that's actually what we want. +string lastname(const string& name) +{ + size_t last_space = name.find_last_of(' '); + if(last_space >= name.length()-1) return name; + else return name.substr(last_space+1); +} + +struct contributor +{ + string name; + int changedlines; + int changesets; + string url; + string misc; + + contributor() : changedlines(0), changesets(0) {} + + bool operator < (const contributor& other) + { + return lastname(name).compare(lastname(other.name)) < 0; + } +}; + +void add_online_info_into_contributors_list(list& contributors_list, const char *filename) +{ + string line; + ifstream online_info; + online_info.open(filename, ios::in); + while(!getline(online_info,line).eof()) + { + string hgname, realname, url, misc; + + size_t last_bar = line.find_last_of('|'); + if(last_bar == string::npos) continue; + if(last_bar < line.length()) + misc = line.substr(last_bar+1); + line.erase(last_bar); + + last_bar = line.find_last_of('|'); + if(last_bar == string::npos) continue; + if(last_bar < line.length()) + url = line.substr(last_bar+1); + line.erase(last_bar); + + last_bar = line.find_last_of('|'); + if(last_bar == string::npos) continue; + if(last_bar < line.length()) + realname = line.substr(last_bar+1); + line.erase(last_bar); + + hgname = line; + + // remove the example line + if(hgname.find("MercurialName") != string::npos) continue; + + list::iterator it; + for(it=contributors_list.begin(); it != contributors_list.end() && it->name != hgname; ++it) + {} + + if(it == contributors_list.end()) + { + contributor c; + c.name = realname; + c.url = url; + c.misc = misc; + contributors_list.push_back(c); + } + else + { + it->name = realname; + it->url = url; + it->misc = misc; + } + } +} + +int main() +{ + // parse the hg churn output files + map contributors_map_for_changedlines = contributors_map_from_churn_output("churn-changedlines.out"); + //map contributors_map_for_changesets = contributors_map_from_churn_output("churn-changesets.out"); + + // merge into the contributors list + list contributors_list; + map::iterator it; + for(it=contributors_map_for_changedlines.begin(); it != contributors_map_for_changedlines.end(); ++it) + { + contributor c; + c.name = it->first; + c.changedlines = it->second; + c.changesets = 0; //contributors_map_for_changesets.find(it->first)->second; + contributors_list.push_back(c); + } + + add_online_info_into_contributors_list(contributors_list, "online-info.out"); + + contributors_list.sort(); + + cout << "{| cellpadding=\"5\"\n"; + cout << "!\n"; + cout << "! Lines changed\n"; + cout << "!\n"; + + list::iterator itc; + int i = 0; + for(itc=contributors_list.begin(); itc != contributors_list.end(); ++itc) + { + if(itc->name.length() == 0) continue; + if(i%2) cout << "|-\n"; + else cout << "|- style=\"background:#FFFFD0\"\n"; + if(itc->url.length()) + cout << "| [" << itc->url << " " << itc->name << "]\n"; + else + cout << "| " << itc->name << "\n"; + if(itc->changedlines) + cout << "| " << itc->changedlines << "\n"; + else + cout << "| (no information)\n"; + cout << "| " << itc->misc << "\n"; + i++; + } + cout << "|}" << endl; +} diff --git a/thirdparty/eigen/scripts/eigen_gen_docs b/thirdparty/eigen/scripts/eigen_gen_docs new file mode 100644 index 000000000..787dcb325 --- /dev/null +++ b/thirdparty/eigen/scripts/eigen_gen_docs @@ -0,0 +1,24 @@ +#!/bin/sh + +# configuration +# You should call this script with USER set as you want, else some default +# will be used +USER=${USER:-'orzel'} +UPLOAD_DIR=dox-devel + +#ulimit -v 1024000 + +# step 1 : build +rm build/doc/html -Rf +mkdir build -p +(cd build && cmake .. && make doc) || { echo "make failed"; exit 1; } + +#step 2 : upload +# (the '/' at the end of path is very important, see rsync documentation) +rsync -az --no-p --delete build/doc/html/ $USER@ssh.tuxfamily.org:eigen/eigen.tuxfamily.org-web/htdocs/$UPLOAD_DIR/ || { echo "upload failed"; exit 1; } + +#step 3 : fix the perm +ssh $USER@ssh.tuxfamily.org "chmod -R g+w /home/eigen/eigen.tuxfamily.org-web/htdocs/$UPLOAD_DIR" || { echo "perm failed"; exit 1; } + +echo "Uploaded successfully" + diff --git a/thirdparty/eigen/scripts/release.in b/thirdparty/eigen/scripts/release.in new file mode 100755 index 000000000..db2d9d940 --- /dev/null +++ b/thirdparty/eigen/scripts/release.in @@ -0,0 +1,3 @@ +#!/bin/sh + +cmake -DCMAKE_BUILD_TYPE=Release . diff --git a/thirdparty/eigen/scripts/relicense.py b/thirdparty/eigen/scripts/relicense.py new file mode 100644 index 000000000..8a5265f1f --- /dev/null +++ b/thirdparty/eigen/scripts/relicense.py @@ -0,0 +1,69 @@ +# This file is part of Eigen, a lightweight C++ template library +# for linear algebra. +# +# Copyright (C) 2012 Keir Mierle +# +# This Source Code Form is subject to the terms of the Mozilla +# Public License v. 2.0. If a copy of the MPL was not distributed +# with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Author: mierle@gmail.com (Keir Mierle) +# +# Make the long-awaited conversion to MPL. + +lgpl3_header = ''' +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . +''' + +mpl2_header = """ +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +""" + +import os +import sys + +exclusions = set(['relicense.py']) + +def update(text): + if text.find(lgpl3_header) == -1: + return text, False + return text.replace(lgpl3_header, mpl2_header), True + +rootdir = sys.argv[1] +for root, sub_folders, files in os.walk(rootdir): + for basename in files: + if basename in exclusions: + print 'SKIPPED', filename + continue + filename = os.path.join(root, basename) + fo = file(filename) + text = fo.read() + fo.close() + + text, updated = update(text) + if updated: + fo = file(filename, "w") + fo.write(text) + fo.close() + print 'UPDATED', filename + else: + print ' ', filename diff --git a/thirdparty/eigen/signature_of_eigen3_matrix_library b/thirdparty/eigen/signature_of_eigen3_matrix_library new file mode 100644 index 000000000..80aaf4621 --- /dev/null +++ b/thirdparty/eigen/signature_of_eigen3_matrix_library @@ -0,0 +1 @@ +This file is just there as a signature to help identify directories containing Eigen3. When writing a script looking for Eigen3, just look for this file. This is especially useful to help disambiguate with Eigen2... diff --git a/thirdparty/eigen/test/CMakeLists.txt b/thirdparty/eigen/test/CMakeLists.txt new file mode 100644 index 000000000..e17985107 --- /dev/null +++ b/thirdparty/eigen/test/CMakeLists.txt @@ -0,0 +1,381 @@ +# generate split test header file only if it does not yet exist +# in order to prevent a rebuild everytime cmake is configured +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "") + foreach(i RANGE 1 999) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h + "#ifdef EIGEN_TEST_PART_${i}\n" + "#define CALL_SUBTEST_${i}(FUNC) CALL_SUBTEST(FUNC)\n" + "#else\n" + "#define CALL_SUBTEST_${i}(FUNC)\n" + "#endif\n\n" + ) + endforeach() +endif() + +# check if we have a Fortran compiler +include("../cmake/language_support.cmake") + +workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS) + +if(EIGEN_Fortran_COMPILER_WORKS) + enable_language(Fortran OPTIONAL) + if(NOT CMAKE_Fortran_COMPILER) + set(EIGEN_Fortran_COMPILER_WORKS OFF) + endif() +endif() + +if(NOT EIGEN_Fortran_COMPILER_WORKS) + # search for a default Lapack library to complete Eigen's one + find_package(LAPACK) +endif() + +# configure blas/lapack (use Eigen's ones) +set(EIGEN_BLAS_LIBRARIES eigen_blas) +set(EIGEN_LAPACK_LIBRARIES eigen_lapack) + +set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse matrices contained in the specified path") +if(EIGEN_TEST_MATRIX_DIR) + if(NOT WIN32) + message(STATUS "Test realworld sparse matrices: ${EIGEN_TEST_MATRIX_DIR}") + add_definitions( -DTEST_REAL_CASES="${EIGEN_TEST_MATRIX_DIR}" ) + else(NOT WIN32) + message(STATUS "REAL CASES CAN NOT BE CURRENTLY TESTED ON WIN32") + endif(NOT WIN32) +endif(EIGEN_TEST_MATRIX_DIR) + +set(SPARSE_LIBS " ") + +find_package(Cholmod) +if(CHOLMOD_FOUND) + add_definitions("-DEIGEN_CHOLMOD_SUPPORT") + include_directories(${CHOLMOD_INCLUDES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) + set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) + ei_add_property(EIGEN_TESTED_BACKENDS "Cholmod, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "Cholmod, ") +endif() + +find_package(Umfpack) +if(UMFPACK_FOUND) + add_definitions("-DEIGEN_UMFPACK_SUPPORT") + include_directories(${UMFPACK_INCLUDES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + ei_add_property(EIGEN_TESTED_BACKENDS "UmfPack, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "UmfPack, ") +endif() + +find_package(SuperLU 4.0) +if(SUPERLU_FOUND) + add_definitions("-DEIGEN_SUPERLU_SUPPORT") + include_directories(${SUPERLU_INCLUDES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ") +endif() + + +find_package(Pastix) +find_package(Scotch) +find_package(Metis 5.0 REQUIRED) +if(PASTIX_FOUND) + add_definitions("-DEIGEN_PASTIX_SUPPORT") + include_directories(${PASTIX_INCLUDES}) + if(SCOTCH_FOUND) + include_directories(${SCOTCH_INCLUDES}) + set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES}) + elseif(METIS_FOUND) + include_directories(${METIS_INCLUDES}) + set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES}) + else(SCOTCH_FOUND) + ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ") + endif(SCOTCH_FOUND) + set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES} ${ORDERING_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + ei_add_property(EIGEN_TESTED_BACKENDS "PaStiX, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ") +endif() + +if(METIS_FOUND) + add_definitions("-DEIGEN_METIS_SUPPORT") + include_directories(${METIS_INCLUDES}) + ei_add_property(EIGEN_TESTED_BACKENDS "METIS, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "METIS, ") +endif() + +find_package(SPQR) +if(SPQR_FOUND AND CHOLMOD_FOUND AND (EIGEN_Fortran_COMPILER_WORKS OR LAPACK_FOUND) ) + add_definitions("-DEIGEN_SPQR_SUPPORT") + include_directories(${SPQR_INCLUDES}) + set(SPQR_ALL_LIBS ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${SPQR_ALL_LIBS}) + ei_add_property(EIGEN_TESTED_BACKENDS "SPQR, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "SPQR, ") +endif() + +option(EIGEN_TEST_NOQT "Disable Qt support in unit tests" OFF) +if(NOT EIGEN_TEST_NOQT) + find_package(Qt4) + if(QT4_FOUND) + include(${QT_USE_FILE}) + ei_add_property(EIGEN_TESTED_BACKENDS "Qt4 support, ") + else() + ei_add_property(EIGEN_MISSING_BACKENDS "Qt4 support, ") + endif() +endif(NOT EIGEN_TEST_NOQT) + +if(TEST_LIB) + add_definitions("-DEIGEN_EXTERN_INSTANTIATIONS=1") +endif(TEST_LIB) + +set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Official") +add_custom_target(BuildOfficial) + +ei_add_test(rand) +ei_add_test(meta) +ei_add_test(sizeof) +ei_add_test(dynalloc) +ei_add_test(nomalloc) +ei_add_test(first_aligned) +ei_add_test(nullary) +ei_add_test(mixingtypes) +ei_add_test(packetmath "-DEIGEN_FAST_MATH=1") +ei_add_test(unalignedassert) +ei_add_test(vectorization_logic) +ei_add_test(basicstuff) +ei_add_test(linearstructure) +ei_add_test(integer_types) +ei_add_test(unalignedcount) +if(NOT EIGEN_TEST_NO_EXCEPTIONS) + ei_add_test(exceptions) +endif() +ei_add_test(redux) +ei_add_test(visitor) +ei_add_test(block) +ei_add_test(corners) +ei_add_test(swap) +ei_add_test(resize) +ei_add_test(conservative_resize) +ei_add_test(product_small) +ei_add_test(product_large) +ei_add_test(product_extra) +ei_add_test(diagonalmatrices) +ei_add_test(adjoint) +ei_add_test(diagonal) +ei_add_test(miscmatrices) +ei_add_test(commainitializer) +ei_add_test(smallvectors) +ei_add_test(mapped_matrix) +ei_add_test(mapstride) +ei_add_test(mapstaticmethods) +ei_add_test(array) +ei_add_test(array_for_matrix) +ei_add_test(array_replicate) +ei_add_test(array_reverse) +ei_add_test(ref) +ei_add_test(is_same_dense) +ei_add_test(triangular) +ei_add_test(selfadjoint) +ei_add_test(product_selfadjoint) +ei_add_test(product_symm) +ei_add_test(product_syrk) +ei_add_test(product_trmv) +ei_add_test(product_trmm) +ei_add_test(product_trsolve) +ei_add_test(product_mmtr) +ei_add_test(product_notemporary) +ei_add_test(stable_norm) +ei_add_test(permutationmatrices) +ei_add_test(bandmatrix) +ei_add_test(cholesky) +ei_add_test(lu) +ei_add_test(determinant) +ei_add_test(inverse) +ei_add_test(qr) +ei_add_test(qr_colpivoting) +ei_add_test(qr_fullpivoting) +ei_add_test(upperbidiagonalization) +ei_add_test(hessenberg) +ei_add_test(schur_real) +ei_add_test(schur_complex) +ei_add_test(eigensolver_selfadjoint) +ei_add_test(eigensolver_generic) +ei_add_test(eigensolver_complex) +ei_add_test(real_qz) +ei_add_test(eigensolver_generalized_real) +ei_add_test(jacobi) +ei_add_test(jacobisvd) +ei_add_test(bdcsvd) +ei_add_test(householder) +ei_add_test(geo_orthomethods) +ei_add_test(geo_quaternion) +ei_add_test(geo_eulerangles) +ei_add_test(geo_parametrizedline) +ei_add_test(geo_alignedbox) +ei_add_test(geo_hyperplane) +ei_add_test(geo_transformations) +ei_add_test(geo_homogeneous) +ei_add_test(stdvector) +ei_add_test(stdvector_overload) +ei_add_test(stdlist) +ei_add_test(stdlist_overload) +ei_add_test(stddeque) +ei_add_test(stddeque_overload) +ei_add_test(sparse_basic) +ei_add_test(sparse_block) +ei_add_test(sparse_vector) +ei_add_test(sparse_product) +ei_add_test(sparse_ref) +ei_add_test(sparse_solvers) +ei_add_test(sparse_permutations) +ei_add_test(simplicial_cholesky) +ei_add_test(conjugate_gradient) +ei_add_test(incomplete_cholesky) +ei_add_test(bicgstab) +ei_add_test(lscg) +ei_add_test(sparselu) +ei_add_test(sparseqr) +ei_add_test(umeyama) +ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}") +ei_add_test(zerosized) +ei_add_test(dontalign) +ei_add_test(evaluators) +if(NOT EIGEN_TEST_NO_EXCEPTIONS) + ei_add_test(sizeoverflow) +endif() +ei_add_test(prec_inverse_4x4) +ei_add_test(vectorwiseop) +ei_add_test(special_numbers) +ei_add_test(rvalue_types) +ei_add_test(dense_storage) +ei_add_test(ctorleak) +ei_add_test(mpl2only) +ei_add_test(inplace_decomposition) +ei_add_test(half_float) +ei_add_test(array_of_string) + +add_executable(bug1213 bug1213.cpp bug1213_main.cpp) + +check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH) +if(COMPILER_SUPPORT_FASTMATH) + set(EIGEN_FASTMATH_FLAGS "-ffast-math") +else() + check_cxx_compiler_flag("/fp:fast" COMPILER_SUPPORT_FPFAST) + if(COMPILER_SUPPORT_FPFAST) + set(EIGEN_FASTMATH_FLAGS "/fp:fast") + endif() +endif() + +ei_add_test(fastmath " ${EIGEN_FASTMATH_FLAGS} ") + +# # ei_add_test(denseLM) + +if(QT4_FOUND) + ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}") +endif(QT4_FOUND) + +if(UMFPACK_FOUND) + ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") +endif() + +if(SUPERLU_FOUND) + ei_add_test(superlu_support "" "${SUPERLU_ALL_LIBS}") +endif() + +if(CHOLMOD_FOUND) + ei_add_test(cholmod_support "" "${CHOLMOD_ALL_LIBS}") +endif() + +if(PARDISO_FOUND) + ei_add_test(pardiso_support "" "${PARDISO_ALL_LIBS}") +endif() + +if(PASTIX_FOUND AND (SCOTCH_FOUND OR METIS_FOUND)) + ei_add_test(pastix_support "" "${PASTIX_ALL_LIBS}") +endif() + +if(SPQR_FOUND AND CHOLMOD_FOUND) + ei_add_test(spqr_support "" "${SPQR_ALL_LIBS}") +endif() + +if(METIS_FOUND) +ei_add_test(metis_support "" "${METIS_LIBRARIES}") +endif() + +string(TOLOWER "${CMAKE_CXX_COMPILER}" cmake_cxx_compiler_tolower) +if(cmake_cxx_compiler_tolower MATCHES "qcc") + set(CXX_IS_QCC "ON") +endif() + +ei_add_property(EIGEN_TESTING_SUMMARY "CXX: ${CMAKE_CXX_COMPILER}\n") +if(CMAKE_COMPILER_IS_GNUCXX AND NOT CXX_IS_QCC) + execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version COMMAND head -n 1 OUTPUT_VARIABLE EIGEN_CXX_VERSION_STRING OUTPUT_STRIP_TRAILING_WHITESPACE) + ei_add_property(EIGEN_TESTING_SUMMARY "CXX_VERSION: ${EIGEN_CXX_VERSION_STRING}\n") +endif() +ei_add_property(EIGEN_TESTING_SUMMARY "CXX_FLAGS: ${CMAKE_CXX_FLAGS}\n") +ei_add_property(EIGEN_TESTING_SUMMARY "Sparse lib flags: ${SPARSE_LIBS}\n") + +option(EIGEN_TEST_EIGEN2 "Run whole Eigen2 test suite against EIGEN2_SUPPORT" OFF) +mark_as_advanced(EIGEN_TEST_EIGEN2) +if(EIGEN_TEST_EIGEN2) + message(WARNING "The Eigen2 test suite has been removed") +endif() + +# boost MP unit test +find_package(Boost) +if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + ei_add_test(boostmultiprec "" "${Boost_LIBRARIES}") + ei_add_property(EIGEN_TESTED_BACKENDS "Boost.Multiprecision, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "Boost.Multiprecision, ") +endif() + + +# CUDA unit tests +option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF) +option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF) + +if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang") + message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.") +endif() + +if(EIGEN_TEST_CUDA) + +find_package(CUDA 5.0) +if(CUDA_FOUND) + + set(CUDA_PROPAGATE_HOST_FLAGS OFF) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) + endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") + endif() + cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR}) + set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") + + ei_add_test(cuda_basic) + + unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) + +endif(CUDA_FOUND) + +endif(EIGEN_TEST_CUDA) + + +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests) +add_test(NAME failtests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests COMMAND ${CMAKE_COMMAND} ${Eigen_SOURCE_DIR} -G "${CMAKE_GENERATOR}" -DEIGEN_FAILTEST=ON) + +option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF) +IF(EIGEN_TEST_BUILD_DOCUMENTATION) + add_dependencies(buildtests doc) +ENDIF() diff --git a/thirdparty/eigen/test/adjoint.cpp b/thirdparty/eigen/test/adjoint.cpp new file mode 100644 index 000000000..bdea51c10 --- /dev/null +++ b/thirdparty/eigen/test/adjoint.cpp @@ -0,0 +1,200 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT + +#include "main.h" + +template struct adjoint_specific; + +template<> struct adjoint_specific { + template + static void run(const Vec& v1, const Vec& v2, Vec& v3, const Mat& square, Scalar s1, Scalar s2) { + VERIFY(test_isApproxWithRef((s1 * v1 + s2 * v2).dot(v3), numext::conj(s1) * v1.dot(v3) + numext::conj(s2) * v2.dot(v3), 0)); + VERIFY(test_isApproxWithRef(v3.dot(s1 * v1 + s2 * v2), s1*v3.dot(v1)+s2*v3.dot(v2), 0)); + + // check compatibility of dot and adjoint + VERIFY(test_isApproxWithRef(v1.dot(square * v2), (square.adjoint() * v1).dot(v2), 0)); + } +}; + +template<> struct adjoint_specific { + template + static void run(const Vec& v1, const Vec& v2, Vec& v3, const Mat& square, Scalar s1, Scalar s2) { + typedef typename NumTraits::Real RealScalar; + using std::abs; + + RealScalar ref = NumTraits::IsInteger ? RealScalar(0) : (std::max)((s1 * v1 + s2 * v2).norm(),v3.norm()); + VERIFY(test_isApproxWithRef((s1 * v1 + s2 * v2).dot(v3), numext::conj(s1) * v1.dot(v3) + numext::conj(s2) * v2.dot(v3), ref)); + VERIFY(test_isApproxWithRef(v3.dot(s1 * v1 + s2 * v2), s1*v3.dot(v1)+s2*v3.dot(v2), ref)); + + VERIFY_IS_APPROX(v1.squaredNorm(), v1.norm() * v1.norm()); + // check normalized() and normalize() + VERIFY_IS_APPROX(v1, v1.norm() * v1.normalized()); + v3 = v1; + v3.normalize(); + VERIFY_IS_APPROX(v1, v1.norm() * v3); + VERIFY_IS_APPROX(v3, v1.normalized()); + VERIFY_IS_APPROX(v3.norm(), RealScalar(1)); + + // check null inputs + VERIFY_IS_APPROX((v1*0).normalized(), (v1*0)); +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE) + RealScalar very_small = (std::numeric_limits::min)(); + VERIFY( (v1*very_small).norm() == 0 ); + VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small)); + v3 = v1*very_small; + v3.normalize(); + VERIFY_IS_APPROX(v3, (v1*very_small)); +#endif + + // check compatibility of dot and adjoint + ref = NumTraits::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm())); + VERIFY(internal::isMuchSmallerThan(abs(v1.dot(square * v2) - (square.adjoint() * v1).dot(v2)), ref, test_precision())); + + // check that Random().normalized() works: tricky as the random xpr must be evaluated by + // normalized() in order to produce a consistent result. + VERIFY_IS_APPROX(Vec::Random(v1.size()).normalized().norm(), RealScalar(1)); + } +}; + +template void adjoint(const MatrixType& m) +{ + /* this test covers the following files: + Transpose.h Conjugate.h Dot.h + */ + using std::abs; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + typedef Matrix SquareMatrixType; + const Index PacketSize = internal::packet_traits::size; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + square = SquareMatrixType::Random(rows, rows); + VectorType v1 = VectorType::Random(rows), + v2 = VectorType::Random(rows), + v3 = VectorType::Random(rows), + vzero = VectorType::Zero(rows); + + Scalar s1 = internal::random(), + s2 = internal::random(); + + // check basic compatibility of adjoint, transpose, conjugate + VERIFY_IS_APPROX(m1.transpose().conjugate().adjoint(), m1); + VERIFY_IS_APPROX(m1.adjoint().conjugate().transpose(), m1); + + // check multiplicative behavior + VERIFY_IS_APPROX((m1.adjoint() * m2).adjoint(), m2.adjoint() * m1); + VERIFY_IS_APPROX((s1 * m1).adjoint(), numext::conj(s1) * m1.adjoint()); + + // check basic properties of dot, squaredNorm + VERIFY_IS_APPROX(numext::conj(v1.dot(v2)), v2.dot(v1)); + VERIFY_IS_APPROX(numext::real(v1.dot(v1)), v1.squaredNorm()); + + adjoint_specific::IsInteger>::run(v1, v2, v3, square, s1, s2); + + VERIFY_IS_MUCH_SMALLER_THAN(abs(vzero.dot(v1)), static_cast(1)); + + // like in testBasicStuff, test operator() to check const-qualification + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + VERIFY_IS_APPROX(m1.conjugate()(r,c), numext::conj(m1(r,c))); + VERIFY_IS_APPROX(m1.adjoint()(c,r), numext::conj(m1(r,c))); + + // check inplace transpose + m3 = m1; + m3.transposeInPlace(); + VERIFY_IS_APPROX(m3,m1.transpose()); + m3.transposeInPlace(); + VERIFY_IS_APPROX(m3,m1); + + if(PacketSize(0,m3.rows()-PacketSize); + Index j = internal::random(0,m3.cols()-PacketSize); + m3.template block(i,j).transposeInPlace(); + VERIFY_IS_APPROX( (m3.template block(i,j)), (m1.template block(i,j).transpose()) ); + m3.template block(i,j).transposeInPlace(); + VERIFY_IS_APPROX(m3,m1); + } + + // check inplace adjoint + m3 = m1; + m3.adjointInPlace(); + VERIFY_IS_APPROX(m3,m1.adjoint()); + m3.transposeInPlace(); + VERIFY_IS_APPROX(m3,m1.conjugate()); + + // check mixed dot product + typedef Matrix RealVectorType; + RealVectorType rv1 = RealVectorType::Random(rows); + VERIFY_IS_APPROX(v1.dot(rv1.template cast()), v1.dot(rv1)); + VERIFY_IS_APPROX(rv1.template cast().dot(v1), rv1.dot(v1)); +} + +void test_adjoint() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( adjoint(Matrix()) ); + CALL_SUBTEST_2( adjoint(Matrix3d()) ); + CALL_SUBTEST_3( adjoint(Matrix4f()) ); + + CALL_SUBTEST_4( adjoint(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_5( adjoint(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( adjoint(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + + // Complement for 128 bits vectorization: + CALL_SUBTEST_8( adjoint(Matrix2d()) ); + CALL_SUBTEST_9( adjoint(Matrix()) ); + + // 256 bits vectorization: + CALL_SUBTEST_10( adjoint(Matrix()) ); + CALL_SUBTEST_11( adjoint(Matrix()) ); + CALL_SUBTEST_12( adjoint(Matrix()) ); + } + // test a large static matrix only once + CALL_SUBTEST_7( adjoint(Matrix()) ); + +#ifdef EIGEN_TEST_PART_13 + { + MatrixXcf a(10,10), b(10,10); + VERIFY_RAISES_ASSERT(a = a.transpose()); + VERIFY_RAISES_ASSERT(a = a.transpose() + b); + VERIFY_RAISES_ASSERT(a = b + a.transpose()); + VERIFY_RAISES_ASSERT(a = a.conjugate().transpose()); + VERIFY_RAISES_ASSERT(a = a.adjoint()); + VERIFY_RAISES_ASSERT(a = a.adjoint() + b); + VERIFY_RAISES_ASSERT(a = b + a.adjoint()); + + // no assertion should be triggered for these cases: + a.transpose() = a.transpose(); + a.transpose() += a.transpose(); + a.transpose() += a.transpose() + b; + a.transpose() = a.adjoint(); + a.transpose() += a.adjoint(); + a.transpose() += a.adjoint() + b; + + // regression tests for check_for_aliasing + MatrixXd c(10,10); + c = 1.0 * MatrixXd::Ones(10,10) + c; + c = MatrixXd::Ones(10,10) * 1.0 + c; + c = c + MatrixXd::Ones(10,10) .cwiseProduct( MatrixXd::Zero(10,10) ); + c = MatrixXd::Ones(10,10) * MatrixXd::Zero(10,10); + } +#endif +} + diff --git a/thirdparty/eigen/test/array.cpp b/thirdparty/eigen/test/array.cpp new file mode 100644 index 000000000..15c3266a9 --- /dev/null +++ b/thirdparty/eigen/test/array.cpp @@ -0,0 +1,495 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void array(const ArrayType& m) +{ + typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + typedef typename ArrayType::RealScalar RealScalar; + typedef Array ColVectorType; + typedef Array RowVectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + + ArrayType m1 = ArrayType::Random(rows, cols), + m2 = ArrayType::Random(rows, cols), + m3(rows, cols); + ArrayType m4 = m1; // copy constructor + VERIFY_IS_APPROX(m1, m4); + + ColVectorType cv1 = ColVectorType::Random(rows); + RowVectorType rv1 = RowVectorType::Random(cols); + + Scalar s1 = internal::random(), + s2 = internal::random(); + + // scalar addition + VERIFY_IS_APPROX(m1 + s1, s1 + m1); + VERIFY_IS_APPROX(m1 + s1, ArrayType::Constant(rows,cols,s1) + m1); + VERIFY_IS_APPROX(s1 - m1, (-m1)+s1 ); + VERIFY_IS_APPROX(m1 - s1, m1 - ArrayType::Constant(rows,cols,s1)); + VERIFY_IS_APPROX(s1 - m1, ArrayType::Constant(rows,cols,s1) - m1); + VERIFY_IS_APPROX((m1*Scalar(2)) - s2, (m1+m1) - ArrayType::Constant(rows,cols,s2) ); + m3 = m1; + m3 += s2; + VERIFY_IS_APPROX(m3, m1 + s2); + m3 = m1; + m3 -= s1; + VERIFY_IS_APPROX(m3, m1 - s1); + + // scalar operators via Maps + m3 = m1; + ArrayType::Map(m1.data(), m1.rows(), m1.cols()) -= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); + VERIFY_IS_APPROX(m1, m3 - m2); + + m3 = m1; + ArrayType::Map(m1.data(), m1.rows(), m1.cols()) += ArrayType::Map(m2.data(), m2.rows(), m2.cols()); + VERIFY_IS_APPROX(m1, m3 + m2); + + m3 = m1; + ArrayType::Map(m1.data(), m1.rows(), m1.cols()) *= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); + VERIFY_IS_APPROX(m1, m3 * m2); + + m3 = m1; + m2 = ArrayType::Random(rows,cols); + m2 = (m2==0).select(1,m2); + ArrayType::Map(m1.data(), m1.rows(), m1.cols()) /= ArrayType::Map(m2.data(), m2.rows(), m2.cols()); + VERIFY_IS_APPROX(m1, m3 / m2); + + // reductions + VERIFY_IS_APPROX(m1.abs().colwise().sum().sum(), m1.abs().sum()); + VERIFY_IS_APPROX(m1.abs().rowwise().sum().sum(), m1.abs().sum()); + using std::abs; + VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.colwise().sum().sum() - m1.sum()), m1.abs().sum()); + VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum()); + if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision())) + VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + + // vector-wise ops + m3 = m1; + VERIFY_IS_APPROX(m3.colwise() += cv1, m1.colwise() + cv1); + m3 = m1; + VERIFY_IS_APPROX(m3.colwise() -= cv1, m1.colwise() - cv1); + m3 = m1; + VERIFY_IS_APPROX(m3.rowwise() += rv1, m1.rowwise() + rv1); + m3 = m1; + VERIFY_IS_APPROX(m3.rowwise() -= rv1, m1.rowwise() - rv1); + + // Conversion from scalar + VERIFY_IS_APPROX((m3 = s1), ArrayType::Constant(rows,cols,s1)); + VERIFY_IS_APPROX((m3 = 1), ArrayType::Constant(rows,cols,1)); + VERIFY_IS_APPROX((m3.topLeftCorner(rows,cols) = 1), ArrayType::Constant(rows,cols,1)); + typedef Array FixedArrayType; + FixedArrayType f1(s1); + VERIFY_IS_APPROX(f1, FixedArrayType::Constant(s1)); + FixedArrayType f2(numext::real(s1)); + VERIFY_IS_APPROX(f2, FixedArrayType::Constant(numext::real(s1))); + FixedArrayType f3((int)100*numext::real(s1)); + VERIFY_IS_APPROX(f3, FixedArrayType::Constant((int)100*numext::real(s1))); + f1.setRandom(); + FixedArrayType f4(f1.data()); + VERIFY_IS_APPROX(f4, f1); + + // pow + VERIFY_IS_APPROX(m1.pow(2), m1.square()); + VERIFY_IS_APPROX(pow(m1,2), m1.square()); + VERIFY_IS_APPROX(m1.pow(3), m1.cube()); + VERIFY_IS_APPROX(pow(m1,3), m1.cube()); + VERIFY_IS_APPROX((-m1).pow(3), -m1.cube()); + VERIFY_IS_APPROX(pow(2*m1,3), 8*m1.cube()); + ArrayType exponents = ArrayType::Constant(rows, cols, RealScalar(2)); + VERIFY_IS_APPROX(Eigen::pow(m1,exponents), m1.square()); + VERIFY_IS_APPROX(m1.pow(exponents), m1.square()); + VERIFY_IS_APPROX(Eigen::pow(2*m1,exponents), 4*m1.square()); + VERIFY_IS_APPROX((2*m1).pow(exponents), 4*m1.square()); + VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square()); + VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square()); + VERIFY_IS_APPROX(Eigen::pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0))); + + // Check possible conflicts with 1D ctor + typedef Array OneDArrayType; + OneDArrayType o1(rows); + VERIFY(o1.size()==rows); + OneDArrayType o4((int)rows); + VERIFY(o4.size()==rows); +} + +template void comparisons(const ArrayType& m) +{ + using std::abs; + typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + ArrayType m1 = ArrayType::Random(rows, cols), + m2 = ArrayType::Random(rows, cols), + m3(rows, cols), + m4 = m1; + + m4 = (m4.abs()==Scalar(0)).select(1,m4); + + VERIFY(((m1 + Scalar(1)) > m1).all()); + VERIFY(((m1 - Scalar(1)) < m1).all()); + if (rows*cols>1) + { + m3 = m1; + m3(r,c) += 1; + VERIFY(! (m1 < m3).all() ); + VERIFY(! (m1 > m3).all() ); + } + VERIFY(!(m1 > m2 && m1 < m2).any()); + VERIFY((m1 <= m2 || m1 >= m2).all()); + + // comparisons array to scalar + VERIFY( (m1 != (m1(r,c)+1) ).any() ); + VERIFY( (m1 > (m1(r,c)-1) ).any() ); + VERIFY( (m1 < (m1(r,c)+1) ).any() ); + VERIFY( (m1 == m1(r,c) ).any() ); + + // comparisons scalar to array + VERIFY( ( (m1(r,c)+1) != m1).any() ); + VERIFY( ( (m1(r,c)-1) < m1).any() ); + VERIFY( ( (m1(r,c)+1) > m1).any() ); + VERIFY( ( m1(r,c) == m1).any() ); + + // test Select + VERIFY_IS_APPROX( (m1m2).select(m1,m2), m1.cwiseMax(m2) ); + Scalar mid = (m1.cwiseAbs().minCoeff() + m1.cwiseAbs().maxCoeff())/Scalar(2); + for (int j=0; j=ArrayType::Constant(rows,cols,mid)) + .select(m1,0), m3); + // even shorter version: + VERIFY_IS_APPROX( (m1.abs()RealScalar(0.1)).count() == rows*cols); + + // and/or + VERIFY( (m1RealScalar(0)).count() == 0); + VERIFY( (m1=RealScalar(0)).count() == rows*cols); + RealScalar a = m1.abs().mean(); + VERIFY( (m1<-a || m1>a).count() == (m1.abs()>a).count()); + + typedef Array ArrayOfIndices; + + // TODO allows colwise/rowwise for array + VERIFY_IS_APPROX(((m1.abs()+1)>RealScalar(0.1)).colwise().count(), ArrayOfIndices::Constant(cols,rows).transpose()); + VERIFY_IS_APPROX(((m1.abs()+1)>RealScalar(0.1)).rowwise().count(), ArrayOfIndices::Constant(rows, cols)); +} + +template void array_real(const ArrayType& m) +{ + using std::abs; + using std::sqrt; + typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + ArrayType m1 = ArrayType::Random(rows, cols), + m2 = ArrayType::Random(rows, cols), + m3(rows, cols), + m4 = m1; + + m4 = (m4.abs()==Scalar(0)).select(1,m4); + + Scalar s1 = internal::random(); + + // these tests are mostly to check possible compilation issues with free-functions. + VERIFY_IS_APPROX(m1.sin(), sin(m1)); + VERIFY_IS_APPROX(m1.cos(), cos(m1)); + VERIFY_IS_APPROX(m1.tan(), tan(m1)); + VERIFY_IS_APPROX(m1.asin(), asin(m1)); + VERIFY_IS_APPROX(m1.acos(), acos(m1)); + VERIFY_IS_APPROX(m1.atan(), atan(m1)); + VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); + VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); + VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + + VERIFY_IS_APPROX(m1.arg(), arg(m1)); + VERIFY_IS_APPROX(m1.round(), round(m1)); + VERIFY_IS_APPROX(m1.floor(), floor(m1)); + VERIFY_IS_APPROX(m1.ceil(), ceil(m1)); + VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); + VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); + VERIFY((m1.isFinite() == (Eigen::isfinite)(m1)).all()); + VERIFY_IS_APPROX(m1.inverse(), inverse(m1)); + VERIFY_IS_APPROX(m1.abs(), abs(m1)); + VERIFY_IS_APPROX(m1.abs2(), abs2(m1)); + VERIFY_IS_APPROX(m1.square(), square(m1)); + VERIFY_IS_APPROX(m1.cube(), cube(m1)); + VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); + + + // avoid NaNs with abs() so verification doesn't fail + m3 = m1.abs(); + VERIFY_IS_APPROX(m3.sqrt(), sqrt(abs(m1))); + VERIFY_IS_APPROX(m3.rsqrt(), Scalar(1)/sqrt(abs(m1))); + VERIFY_IS_APPROX(rsqrt(m3), Scalar(1)/sqrt(abs(m1))); + VERIFY_IS_APPROX(m3.log(), log(m3)); + VERIFY_IS_APPROX(m3.log1p(), log1p(m3)); + VERIFY_IS_APPROX(m3.log10(), log10(m3)); + + + VERIFY((!(m1>m2) == (m1<=m2)).all()); + + VERIFY_IS_APPROX(sin(m1.asin()), m1); + VERIFY_IS_APPROX(cos(m1.acos()), m1); + VERIFY_IS_APPROX(tan(m1.atan()), m1); + VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); + VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); + VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); + VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast())*std::acos(-1.0)); + VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); + VERIFY((Eigen::isnan)((m1*0.0)/0.0).all()); + VERIFY((Eigen::isinf)(m4/0.0).all()); + VERIFY(((Eigen::isfinite)(m1) && (!(Eigen::isfinite)(m1*0.0/0.0)) && (!(Eigen::isfinite)(m4/0.0))).all()); + VERIFY_IS_APPROX(inverse(inverse(m1)),m1); + VERIFY((abs(m1) == m1 || abs(m1) == -m1).all()); + VERIFY_IS_APPROX(m3, sqrt(abs2(m1))); + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1*m1.sign(),m1.abs()); + VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1); + + VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1)); + VERIFY_IS_APPROX(numext::abs2(real(m1)) + numext::abs2(imag(m1)), numext::abs2(m1)); + if(!NumTraits::IsComplex) + VERIFY_IS_APPROX(numext::real(m1), m1); + + // shift argument of logarithm so that it is not zero + Scalar smallNumber = NumTraits::dummy_precision(); + VERIFY_IS_APPROX((m3 + smallNumber).log() , log(abs(m1) + smallNumber)); + VERIFY_IS_APPROX((m3 + smallNumber + 1).log() , log1p(abs(m1) + smallNumber)); + + VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); + VERIFY_IS_APPROX(m1.exp(), exp(m1)); + VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp()); + + VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); + VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); + + VERIFY_IS_APPROX(m3.pow(RealScalar(-0.5)), m3.rsqrt()); + VERIFY_IS_APPROX(pow(m3,RealScalar(-0.5)), m3.rsqrt()); + + VERIFY_IS_APPROX(log10(m3), log(m3)/log(10)); + + // scalar by array division + const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); + s1 += Scalar(tiny); + m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); + VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); + + // check inplace transpose + m3 = m1; + m3.transposeInPlace(); + VERIFY_IS_APPROX(m3, m1.transpose()); + m3.transposeInPlace(); + VERIFY_IS_APPROX(m3, m1); +} + +template void array_complex(const ArrayType& m) +{ + typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + ArrayType m1 = ArrayType::Random(rows, cols), + m2(rows, cols), + m4 = m1; + + m4.real() = (m4.real().abs()==RealScalar(0)).select(RealScalar(1),m4.real()); + m4.imag() = (m4.imag().abs()==RealScalar(0)).select(RealScalar(1),m4.imag()); + + Array m3(rows, cols); + + for (Index i = 0; i < m.rows(); ++i) + for (Index j = 0; j < m.cols(); ++j) + m2(i,j) = sqrt(m1(i,j)); + + // these tests are mostly to check possible compilation issues with free-functions. + VERIFY_IS_APPROX(m1.sin(), sin(m1)); + VERIFY_IS_APPROX(m1.cos(), cos(m1)); + VERIFY_IS_APPROX(m1.tan(), tan(m1)); + VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); + VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); + VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + VERIFY_IS_APPROX(m1.arg(), arg(m1)); + VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); + VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); + VERIFY((m1.isFinite() == (Eigen::isfinite)(m1)).all()); + VERIFY_IS_APPROX(m1.inverse(), inverse(m1)); + VERIFY_IS_APPROX(m1.log(), log(m1)); + VERIFY_IS_APPROX(m1.log10(), log10(m1)); + VERIFY_IS_APPROX(m1.abs(), abs(m1)); + VERIFY_IS_APPROX(m1.abs2(), abs2(m1)); + VERIFY_IS_APPROX(m1.sqrt(), sqrt(m1)); + VERIFY_IS_APPROX(m1.square(), square(m1)); + VERIFY_IS_APPROX(m1.cube(), cube(m1)); + VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); + + + VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); + VERIFY_IS_APPROX(m1.exp(), exp(m1)); + VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp()); + + VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); + VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); + VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); + + for (Index i = 0; i < m.rows(); ++i) + for (Index j = 0; j < m.cols(); ++j) + m3(i,j) = std::atan2(imag(m1(i,j)), real(m1(i,j))); + VERIFY_IS_APPROX(arg(m1), m3); + + std::complex zero(0.0,0.0); + VERIFY((Eigen::isnan)(m1*zero/zero).all()); +#if EIGEN_COMP_MSVC + // msvc complex division is not robust + VERIFY((Eigen::isinf)(m4/RealScalar(0)).all()); +#else +#if EIGEN_COMP_CLANG + // clang's complex division is notoriously broken too + if((numext::isinf)(m4(0,0)/RealScalar(0))) { +#endif + VERIFY((Eigen::isinf)(m4/zero).all()); +#if EIGEN_COMP_CLANG + } + else + { + VERIFY((Eigen::isinf)(m4.real()/zero.real()).all()); + } +#endif +#endif // MSVC + + VERIFY(((Eigen::isfinite)(m1) && (!(Eigen::isfinite)(m1*zero/zero)) && (!(Eigen::isfinite)(m1/zero))).all()); + + VERIFY_IS_APPROX(inverse(inverse(m1)),m1); + VERIFY_IS_APPROX(conj(m1.conjugate()), m1); + VERIFY_IS_APPROX(abs(m1), sqrt(square(real(m1))+square(imag(m1)))); + VERIFY_IS_APPROX(abs(m1), sqrt(abs2(m1))); + VERIFY_IS_APPROX(log10(m1), log(m1)/log(10)); + + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1); + + // scalar by array division + Scalar s1 = internal::random(); + const RealScalar tiny = std::sqrt(std::numeric_limits::epsilon()); + s1 += Scalar(tiny); + m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); + VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); + + // check inplace transpose + m2 = m1; + m2.transposeInPlace(); + VERIFY_IS_APPROX(m2, m1.transpose()); + m2.transposeInPlace(); + VERIFY_IS_APPROX(m2, m1); + +} + +template void min_max(const ArrayType& m) +{ + typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + ArrayType m1 = ArrayType::Random(rows, cols); + + // min/max with array + Scalar maxM1 = m1.maxCoeff(); + Scalar minM1 = m1.minCoeff(); + + VERIFY_IS_APPROX(ArrayType::Constant(rows,cols, minM1), (m1.min)(ArrayType::Constant(rows,cols, minM1))); + VERIFY_IS_APPROX(m1, (m1.min)(ArrayType::Constant(rows,cols, maxM1))); + + VERIFY_IS_APPROX(ArrayType::Constant(rows,cols, maxM1), (m1.max)(ArrayType::Constant(rows,cols, maxM1))); + VERIFY_IS_APPROX(m1, (m1.max)(ArrayType::Constant(rows,cols, minM1))); + + // min/max with scalar input + VERIFY_IS_APPROX(ArrayType::Constant(rows,cols, minM1), (m1.min)( minM1)); + VERIFY_IS_APPROX(m1, (m1.min)( maxM1)); + + VERIFY_IS_APPROX(ArrayType::Constant(rows,cols, maxM1), (m1.max)( maxM1)); + VERIFY_IS_APPROX(m1, (m1.max)( minM1)); + +} + +void test_array() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( array(Array()) ); + CALL_SUBTEST_2( array(Array22f()) ); + CALL_SUBTEST_3( array(Array44d()) ); + CALL_SUBTEST_4( array(ArrayXXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_5( array(ArrayXXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( array(ArrayXXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( comparisons(Array()) ); + CALL_SUBTEST_2( comparisons(Array22f()) ); + CALL_SUBTEST_3( comparisons(Array44d()) ); + CALL_SUBTEST_5( comparisons(ArrayXXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( comparisons(ArrayXXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( min_max(Array()) ); + CALL_SUBTEST_2( min_max(Array22f()) ); + CALL_SUBTEST_3( min_max(Array44d()) ); + CALL_SUBTEST_5( min_max(ArrayXXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( min_max(ArrayXXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( array_real(Array()) ); + CALL_SUBTEST_2( array_real(Array22f()) ); + CALL_SUBTEST_3( array_real(Array44d()) ); + CALL_SUBTEST_5( array_real(ArrayXXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_4( array_complex(ArrayXXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + + VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, int >::value)); + VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, float >::value)); + VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); + typedef CwiseUnaryOp, ArrayXd > Xpr; + VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, + ArrayBase + >::value)); +} diff --git a/thirdparty/eigen/test/array_for_matrix.cpp b/thirdparty/eigen/test/array_for_matrix.cpp new file mode 100644 index 000000000..c1501947b --- /dev/null +++ b/thirdparty/eigen/test/array_for_matrix.cpp @@ -0,0 +1,284 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void array_for_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix ColVectorType; + typedef Matrix RowVectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols); + + ColVectorType cv1 = ColVectorType::Random(rows); + RowVectorType rv1 = RowVectorType::Random(cols); + + Scalar s1 = internal::random(), + s2 = internal::random(); + + // scalar addition + VERIFY_IS_APPROX(m1.array() + s1, s1 + m1.array()); + VERIFY_IS_APPROX((m1.array() + s1).matrix(), MatrixType::Constant(rows,cols,s1) + m1); + VERIFY_IS_APPROX(((m1*Scalar(2)).array() - s2).matrix(), (m1+m1) - MatrixType::Constant(rows,cols,s2) ); + m3 = m1; + m3.array() += s2; + VERIFY_IS_APPROX(m3, (m1.array() + s2).matrix()); + m3 = m1; + m3.array() -= s1; + VERIFY_IS_APPROX(m3, (m1.array() - s1).matrix()); + + // reductions + VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum().sum() - m1.sum(), m1.squaredNorm()); + VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm()); + VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm()); + VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm()); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + + // vector-wise ops + m3 = m1; + VERIFY_IS_APPROX(m3.colwise() += cv1, m1.colwise() + cv1); + m3 = m1; + VERIFY_IS_APPROX(m3.colwise() -= cv1, m1.colwise() - cv1); + m3 = m1; + VERIFY_IS_APPROX(m3.rowwise() += rv1, m1.rowwise() + rv1); + m3 = m1; + VERIFY_IS_APPROX(m3.rowwise() -= rv1, m1.rowwise() - rv1); + + // empty objects + VERIFY_IS_APPROX(m1.block(0,0,0,cols).colwise().sum(), RowVectorType::Zero(cols)); + VERIFY_IS_APPROX(m1.block(0,0,rows,0).rowwise().prod(), ColVectorType::Ones(rows)); + + // verify the const accessors exist + const Scalar& ref_m1 = m.matrix().array().coeffRef(0); + const Scalar& ref_m2 = m.matrix().array().coeffRef(0,0); + const Scalar& ref_a1 = m.array().matrix().coeffRef(0); + const Scalar& ref_a2 = m.array().matrix().coeffRef(0,0); + VERIFY(&ref_a1 == &ref_m1); + VERIFY(&ref_a2 == &ref_m2); + + // Check write accessors: + m1.array().coeffRef(0,0) = 1; + VERIFY_IS_APPROX(m1(0,0),Scalar(1)); + m1.array()(0,0) = 2; + VERIFY_IS_APPROX(m1(0,0),Scalar(2)); + m1.array().matrix().coeffRef(0,0) = 3; + VERIFY_IS_APPROX(m1(0,0),Scalar(3)); + m1.array().matrix()(0,0) = 4; + VERIFY_IS_APPROX(m1(0,0),Scalar(4)); +} + +template void comparisons(const MatrixType& m) +{ + using std::abs; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols); + + VERIFY(((m1.array() + Scalar(1)) > m1.array()).all()); + VERIFY(((m1.array() - Scalar(1)) < m1.array()).all()); + if (rows*cols>1) + { + m3 = m1; + m3(r,c) += 1; + VERIFY(! (m1.array() < m3.array()).all() ); + VERIFY(! (m1.array() > m3.array()).all() ); + } + + // comparisons to scalar + VERIFY( (m1.array() != (m1(r,c)+1) ).any() ); + VERIFY( (m1.array() > (m1(r,c)-1) ).any() ); + VERIFY( (m1.array() < (m1(r,c)+1) ).any() ); + VERIFY( (m1.array() == m1(r,c) ).any() ); + VERIFY( m1.cwiseEqual(m1(r,c)).any() ); + + // test Select + VERIFY_IS_APPROX( (m1.array()m2.array()).select(m1,m2), m1.cwiseMax(m2) ); + Scalar mid = (m1.cwiseAbs().minCoeff() + m1.cwiseAbs().maxCoeff())/Scalar(2); + for (int j=0; j=MatrixType::Constant(rows,cols,mid).array()) + .select(m1,0), m3); + // even shorter version: + VERIFY_IS_APPROX( (m1.array().abs()RealScalar(0.1)).count() == rows*cols); + + // and/or + VERIFY( ((m1.array()RealScalar(0)).matrix()).count() == 0); + VERIFY( ((m1.array()=RealScalar(0)).matrix()).count() == rows*cols); + RealScalar a = m1.cwiseAbs().mean(); + VERIFY( ((m1.array()<-a).matrix() || (m1.array()>a).matrix()).count() == (m1.cwiseAbs().array()>a).count()); + + typedef Matrix VectorOfIndices; + + // TODO allows colwise/rowwise for array + VERIFY_IS_APPROX(((m1.array().abs()+1)>RealScalar(0.1)).matrix().colwise().count(), VectorOfIndices::Constant(cols,rows).transpose()); + VERIFY_IS_APPROX(((m1.array().abs()+1)>RealScalar(0.1)).matrix().rowwise().count(), VectorOfIndices::Constant(rows, cols)); +} + +template void lpNorm(const VectorType& v) +{ + using std::sqrt; + typedef typename VectorType::RealScalar RealScalar; + VectorType u = VectorType::Random(v.size()); + + if(v.size()==0) + { + VERIFY_IS_APPROX(u.template lpNorm(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<1>(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<2>(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<5>(), RealScalar(0)); + } + else + { + VERIFY_IS_APPROX(u.template lpNorm(), u.cwiseAbs().maxCoeff()); + } + + VERIFY_IS_APPROX(u.template lpNorm<1>(), u.cwiseAbs().sum()); + VERIFY_IS_APPROX(u.template lpNorm<2>(), sqrt(u.array().abs().square().sum())); + VERIFY_IS_APPROX(numext::pow(u.template lpNorm<5>(), typename VectorType::RealScalar(5)), u.array().abs().pow(5).sum()); +} + +template void cwise_min_max(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols); + + // min/max with array + Scalar maxM1 = m1.maxCoeff(); + Scalar minM1 = m1.minCoeff(); + + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, minM1), m1.cwiseMin(MatrixType::Constant(rows,cols, minM1))); + VERIFY_IS_APPROX(m1, m1.cwiseMin(MatrixType::Constant(rows,cols, maxM1))); + + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, maxM1), m1.cwiseMax(MatrixType::Constant(rows,cols, maxM1))); + VERIFY_IS_APPROX(m1, m1.cwiseMax(MatrixType::Constant(rows,cols, minM1))); + + // min/max with scalar input + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, minM1), m1.cwiseMin( minM1)); + VERIFY_IS_APPROX(m1, m1.cwiseMin(maxM1)); + VERIFY_IS_APPROX(-m1, (-m1).cwiseMin(-minM1)); + VERIFY_IS_APPROX(-m1.array(), ((-m1).array().min)( -minM1)); + + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, maxM1), m1.cwiseMax( maxM1)); + VERIFY_IS_APPROX(m1, m1.cwiseMax(minM1)); + VERIFY_IS_APPROX(-m1, (-m1).cwiseMax(-maxM1)); + VERIFY_IS_APPROX(-m1.array(), ((-m1).array().max)(-maxM1)); + + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, minM1).array(), (m1.array().min)( minM1)); + VERIFY_IS_APPROX(m1.array(), (m1.array().min)( maxM1)); + + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, maxM1).array(), (m1.array().max)( maxM1)); + VERIFY_IS_APPROX(m1.array(), (m1.array().max)( minM1)); + +} + +template void resize(const MatrixTraits& t) +{ + typedef typename MatrixTraits::Index Index; + typedef typename MatrixTraits::Scalar Scalar; + typedef Matrix MatrixType; + typedef Array Array2DType; + typedef Matrix VectorType; + typedef Array Array1DType; + + Index rows = t.rows(), cols = t.cols(); + + MatrixType m(rows,cols); + VectorType v(rows); + Array2DType a2(rows,cols); + Array1DType a1(rows); + + m.array().resize(rows+1,cols+1); + VERIFY(m.rows()==rows+1 && m.cols()==cols+1); + a2.matrix().resize(rows+1,cols+1); + VERIFY(a2.rows()==rows+1 && a2.cols()==cols+1); + v.array().resize(cols); + VERIFY(v.size()==cols); + a1.matrix().resize(cols); + VERIFY(a1.size()==cols); +} + +void regression_bug_654() +{ + ArrayXf a = RowVectorXf(3); + VectorXf v = Array(3); +} + +void test_array_for_matrix() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( array_for_matrix(Matrix()) ); + CALL_SUBTEST_2( array_for_matrix(Matrix2f()) ); + CALL_SUBTEST_3( array_for_matrix(Matrix4d()) ); + CALL_SUBTEST_4( array_for_matrix(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_5( array_for_matrix(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( array_for_matrix(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( comparisons(Matrix()) ); + CALL_SUBTEST_2( comparisons(Matrix2f()) ); + CALL_SUBTEST_3( comparisons(Matrix4d()) ); + CALL_SUBTEST_5( comparisons(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( comparisons(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( cwise_min_max(Matrix()) ); + CALL_SUBTEST_2( cwise_min_max(Matrix2f()) ); + CALL_SUBTEST_3( cwise_min_max(Matrix4d()) ); + CALL_SUBTEST_5( cwise_min_max(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( cwise_min_max(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( lpNorm(Matrix()) ); + CALL_SUBTEST_2( lpNorm(Vector2f()) ); + CALL_SUBTEST_7( lpNorm(Vector3d()) ); + CALL_SUBTEST_8( lpNorm(Vector4f()) ); + CALL_SUBTEST_5( lpNorm(VectorXf(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_4( lpNorm(VectorXcf(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + CALL_SUBTEST_5( lpNorm(VectorXf(0)) ); + CALL_SUBTEST_4( lpNorm(VectorXcf(0)) ); + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_4( resize(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_5( resize(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( resize(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + CALL_SUBTEST_6( regression_bug_654() ); +} diff --git a/thirdparty/eigen/test/array_of_string.cpp b/thirdparty/eigen/test/array_of_string.cpp new file mode 100644 index 000000000..e23b7c59e --- /dev/null +++ b/thirdparty/eigen/test/array_of_string.cpp @@ -0,0 +1,32 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void test_array_of_string() +{ + typedef Array ArrayXs; + ArrayXs a1(3), a2(3), a3(3), a3ref(3); + a1 << "one", "two", "three"; + a2 << "1", "2", "3"; + a3ref << "one (1)", "two (2)", "three (3)"; + std::stringstream s1; + s1 << a1; + VERIFY_IS_EQUAL(s1.str(), std::string(" one two three")); + a3 = a1 + std::string(" (") + a2 + std::string(")"); + VERIFY((a3==a3ref).all()); + + a3 = a1; + a3 += std::string(" (") + a2 + std::string(")"); + VERIFY((a3==a3ref).all()); + + a1.swap(a3); + VERIFY((a1==a3ref).all()); + VERIFY((a3!=a3ref).all()); +} diff --git a/thirdparty/eigen/test/array_replicate.cpp b/thirdparty/eigen/test/array_replicate.cpp new file mode 100644 index 000000000..779c8fc2f --- /dev/null +++ b/thirdparty/eigen/test/array_replicate.cpp @@ -0,0 +1,82 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void replicate(const MatrixType& m) +{ + /* this test covers the following files: + Replicate.cpp + */ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + typedef Matrix MatrixX; + typedef Matrix VectorX; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols); + + VectorType v1 = VectorType::Random(rows); + + MatrixX x1, x2; + VectorX vx1; + + int f1 = internal::random(1,10), + f2 = internal::random(1,10); + + x1.resize(rows*f1,cols*f2); + for(int j=0; j())); + + x2.resize(rows,3*cols); + x2 << m2, m2, m2; + VERIFY_IS_APPROX(x2, (m2.template replicate<1,3>())); + + vx1.resize(3*rows,cols); + vx1 << m2, m2, m2; + VERIFY_IS_APPROX(vx1+vx1, vx1+(m2.template replicate<3,1>())); + + vx1=m2+(m2.colwise().replicate(1)); + + if(m2.cols()==1) + VERIFY_IS_APPROX(m2.coeff(0), (m2.template replicate<3,1>().coeff(m2.rows()))); + + x2.resize(rows,f1); + for (int j=0; j()) ); + CALL_SUBTEST_2( replicate(Vector2f()) ); + CALL_SUBTEST_3( replicate(Vector3d()) ); + CALL_SUBTEST_4( replicate(Vector4f()) ); + CALL_SUBTEST_5( replicate(VectorXf(16)) ); + CALL_SUBTEST_6( replicate(VectorXcd(10)) ); + } +} diff --git a/thirdparty/eigen/test/array_reverse.cpp b/thirdparty/eigen/test/array_reverse.cpp new file mode 100644 index 000000000..c9d9f90c3 --- /dev/null +++ b/thirdparty/eigen/test/array_reverse.cpp @@ -0,0 +1,146 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009 Ricard Marxer +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +using namespace std; + +template void reverse(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + + // this test relies a lot on Random.h, and there's not much more that we can do + // to test it, hence I consider that we will have tested Random.h + MatrixType m1 = MatrixType::Random(rows, cols), m2; + VectorType v1 = VectorType::Random(rows); + + MatrixType m1_r = m1.reverse(); + // Verify that MatrixBase::reverse() works + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_r(i, j), m1(rows - 1 - i, cols - 1 - j)); + } + } + + Reverse m1_rd(m1); + // Verify that a Reverse default (in both directions) of an expression works + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_rd(i, j), m1(rows - 1 - i, cols - 1 - j)); + } + } + + Reverse m1_rb(m1); + // Verify that a Reverse in both directions of an expression works + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_rb(i, j), m1(rows - 1 - i, cols - 1 - j)); + } + } + + Reverse m1_rv(m1); + // Verify that a Reverse in the vertical directions of an expression works + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_rv(i, j), m1(rows - 1 - i, j)); + } + } + + Reverse m1_rh(m1); + // Verify that a Reverse in the horizontal directions of an expression works + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_rh(i, j), m1(i, cols - 1 - j)); + } + } + + VectorType v1_r = v1.reverse(); + // Verify that a VectorType::reverse() of an expression works + for ( int i = 0; i < rows; i++ ) { + VERIFY_IS_APPROX(v1_r(i), v1(rows - 1 - i)); + } + + MatrixType m1_cr = m1.colwise().reverse(); + // Verify that PartialRedux::reverse() works (for colwise()) + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_cr(i, j), m1(rows - 1 - i, j)); + } + } + + MatrixType m1_rr = m1.rowwise().reverse(); + // Verify that PartialRedux::reverse() works (for rowwise()) + for ( int i = 0; i < rows; i++ ) { + for ( int j = 0; j < cols; j++ ) { + VERIFY_IS_APPROX(m1_rr(i, j), m1(i, cols - 1 - j)); + } + } + + Scalar x = internal::random(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + m1.reverse()(r, c) = x; + VERIFY_IS_APPROX(x, m1(rows - 1 - r, cols - 1 - c)); + + m2 = m1; + m2.reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.reverse().eval()); + + m2 = m1; + m2.col(0).reverseInPlace(); + VERIFY_IS_APPROX(m2.col(0),m1.col(0).reverse().eval()); + + m2 = m1; + m2.row(0).reverseInPlace(); + VERIFY_IS_APPROX(m2.row(0),m1.row(0).reverse().eval()); + + m2 = m1; + m2.rowwise().reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.rowwise().reverse().eval()); + + m2 = m1; + m2.colwise().reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.colwise().reverse().eval()); + + m1.colwise().reverse()(r, c) = x; + VERIFY_IS_APPROX(x, m1(rows - 1 - r, c)); + + m1.rowwise().reverse()(r, c) = x; + VERIFY_IS_APPROX(x, m1(r, cols - 1 - c)); +} + +void test_array_reverse() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( reverse(Matrix()) ); + CALL_SUBTEST_2( reverse(Matrix2f()) ); + CALL_SUBTEST_3( reverse(Matrix4f()) ); + CALL_SUBTEST_4( reverse(Matrix4d()) ); + CALL_SUBTEST_5( reverse(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( reverse(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( reverse(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( reverse(Matrix()) ); + CALL_SUBTEST_9( reverse(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } +#ifdef EIGEN_TEST_PART_3 + Vector4f x; x << 1, 2, 3, 4; + Vector4f y; y << 4, 3, 2, 1; + VERIFY(x.reverse()[1] == 3); + VERIFY(x.reverse() == y); +#endif +} diff --git a/thirdparty/eigen/test/bandmatrix.cpp b/thirdparty/eigen/test/bandmatrix.cpp new file mode 100644 index 000000000..f8c38f7c3 --- /dev/null +++ b/thirdparty/eigen/test/bandmatrix.cpp @@ -0,0 +1,71 @@ +// This file is triangularView of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void bandmatrix(const MatrixType& _m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix DenseMatrixType; + + Index rows = _m.rows(); + Index cols = _m.cols(); + Index supers = _m.supers(); + Index subs = _m.subs(); + + MatrixType m(rows,cols,supers,subs); + + DenseMatrixType dm1(rows,cols); + dm1.setZero(); + + m.diagonal().setConstant(123); + dm1.diagonal().setConstant(123); + for (int i=1; i<=m.supers();++i) + { + m.diagonal(i).setConstant(static_cast(i)); + dm1.diagonal(i).setConstant(static_cast(i)); + } + for (int i=1; i<=m.subs();++i) + { + m.diagonal(-i).setConstant(-static_cast(i)); + dm1.diagonal(-i).setConstant(-static_cast(i)); + } + //std::cerr << m.m_data << "\n\n" << m.toDense() << "\n\n" << dm1 << "\n\n\n\n"; + VERIFY_IS_APPROX(dm1,m.toDenseMatrix()); + + for (int i=0; i(i+1)); + dm1.col(i).setConstant(static_cast(i+1)); + } + Index d = (std::min)(rows,cols); + Index a = std::max(0,cols-d-supers); + Index b = std::max(0,rows-d-subs); + if(a>0) dm1.block(0,d+supers,rows,a).setZero(); + dm1.block(0,supers+1,cols-supers-1-a,cols-supers-1-a).template triangularView().setZero(); + dm1.block(subs+1,0,rows-subs-1-b,rows-subs-1-b).template triangularView().setZero(); + if(b>0) dm1.block(d+subs,0,b,cols).setZero(); + //std::cerr << m.m_data << "\n\n" << m.toDense() << "\n\n" << dm1 << "\n\n"; + VERIFY_IS_APPROX(dm1,m.toDenseMatrix()); + +} + +using Eigen::internal::BandMatrix; + +void test_bandmatrix() +{ + for(int i = 0; i < 10*g_repeat ; i++) { + Index rows = internal::random(1,10); + Index cols = internal::random(1,10); + Index sups = internal::random(0,cols-1); + Index subs = internal::random(0,rows-1); + CALL_SUBTEST(bandmatrix(BandMatrix(rows,cols,sups,subs)) ); + } +} diff --git a/thirdparty/eigen/test/basicstuff.cpp b/thirdparty/eigen/test/basicstuff.cpp new file mode 100644 index 000000000..99d91f9da --- /dev/null +++ b/thirdparty/eigen/test/basicstuff.cpp @@ -0,0 +1,280 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT + +#include "main.h" + +template void basicStuff(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + typedef Matrix SquareMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + // this test relies a lot on Random.h, and there's not much more that we can do + // to test it, hence I consider that we will have tested Random.h + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + mzero = MatrixType::Zero(rows, cols), + square = Matrix::Random(rows, rows); + VectorType v1 = VectorType::Random(rows), + vzero = VectorType::Zero(rows); + SquareMatrixType sm1 = SquareMatrixType::Random(rows,rows), sm2(rows,rows); + + Scalar x = 0; + while(x == Scalar(0)) x = internal::random(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + m1.coeffRef(r,c) = x; + VERIFY_IS_APPROX(x, m1.coeff(r,c)); + m1(r,c) = x; + VERIFY_IS_APPROX(x, m1(r,c)); + v1.coeffRef(r) = x; + VERIFY_IS_APPROX(x, v1.coeff(r)); + v1(r) = x; + VERIFY_IS_APPROX(x, v1(r)); + v1[r] = x; + VERIFY_IS_APPROX(x, v1[r]); + + VERIFY_IS_APPROX( v1, v1); + VERIFY_IS_NOT_APPROX( v1, 2*v1); + VERIFY_IS_MUCH_SMALLER_THAN( vzero, v1); + VERIFY_IS_MUCH_SMALLER_THAN( vzero, v1.squaredNorm()); + VERIFY_IS_NOT_MUCH_SMALLER_THAN(v1, v1); + VERIFY_IS_APPROX( vzero, v1-v1); + VERIFY_IS_APPROX( m1, m1); + VERIFY_IS_NOT_APPROX( m1, 2*m1); + VERIFY_IS_MUCH_SMALLER_THAN( mzero, m1); + VERIFY_IS_NOT_MUCH_SMALLER_THAN(m1, m1); + VERIFY_IS_APPROX( mzero, m1-m1); + + // always test operator() on each read-only expression class, + // in order to check const-qualifiers. + // indeed, if an expression class (here Zero) is meant to be read-only, + // hence has no _write() method, the corresponding MatrixBase method (here zero()) + // should return a const-qualified object so that it is the const-qualified + // operator() that gets called, which in turn calls _read(). + VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows,cols)(r,c), static_cast(1)); + + // now test copying a row-vector into a (column-)vector and conversely. + square.col(r) = square.row(r).eval(); + Matrix rv(rows); + Matrix cv(rows); + rv = square.row(r); + cv = square.col(r); + + VERIFY_IS_APPROX(rv, cv.transpose()); + + if(cols!=1 && rows!=1 && MatrixType::SizeAtCompileTime!=Dynamic) + { + VERIFY_RAISES_ASSERT(m1 = (m2.block(0,0, rows-1, cols-1))); + } + + if(cols!=1 && rows!=1) + { + VERIFY_RAISES_ASSERT(m1[0]); + VERIFY_RAISES_ASSERT((m1+m1)[0]); + } + + VERIFY_IS_APPROX(m3 = m1,m1); + MatrixType m4; + VERIFY_IS_APPROX(m4 = m1,m1); + + m3.real() = m1.real(); + VERIFY_IS_APPROX(static_cast(m3).real(), static_cast(m1).real()); + VERIFY_IS_APPROX(static_cast(m3).real(), m1.real()); + + // check == / != operators + VERIFY(m1==m1); + VERIFY(m1!=m2); + VERIFY(!(m1==m2)); + VERIFY(!(m1!=m1)); + m1 = m2; + VERIFY(m1==m2); + VERIFY(!(m1!=m2)); + + // check automatic transposition + sm2.setZero(); + for(typename MatrixType::Index i=0;i(0,10)>5; + m3 = b ? m1 : m2; + if(b) VERIFY_IS_APPROX(m3,m1); + else VERIFY_IS_APPROX(m3,m2); + m3 = b ? -m1 : m2; + if(b) VERIFY_IS_APPROX(m3,-m1); + else VERIFY_IS_APPROX(m3,m2); + m3 = b ? m1 : -m2; + if(b) VERIFY_IS_APPROX(m3,m1); + else VERIFY_IS_APPROX(m3,-m2); + } +} + +template void basicStuffComplex(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix RealMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + Scalar s1 = internal::random(), + s2 = internal::random(); + + VERIFY(numext::real(s1)==numext::real_ref(s1)); + VERIFY(numext::imag(s1)==numext::imag_ref(s1)); + numext::real_ref(s1) = numext::real(s2); + numext::imag_ref(s1) = numext::imag(s2); + VERIFY(internal::isApprox(s1, s2, NumTraits::epsilon())); + // extended precision in Intel FPUs means that s1 == s2 in the line above is not guaranteed. + + RealMatrixType rm1 = RealMatrixType::Random(rows,cols), + rm2 = RealMatrixType::Random(rows,cols); + MatrixType cm(rows,cols); + cm.real() = rm1; + cm.imag() = rm2; + VERIFY_IS_APPROX(static_cast(cm).real(), rm1); + VERIFY_IS_APPROX(static_cast(cm).imag(), rm2); + rm1.setZero(); + rm2.setZero(); + rm1 = cm.real(); + rm2 = cm.imag(); + VERIFY_IS_APPROX(static_cast(cm).real(), rm1); + VERIFY_IS_APPROX(static_cast(cm).imag(), rm2); + cm.real().setZero(); + VERIFY(static_cast(cm).real().isZero()); + VERIFY(!static_cast(cm).imag().isZero()); +} + +#ifdef EIGEN_TEST_PART_2 +void casting() +{ + Matrix4f m = Matrix4f::Random(), m2; + Matrix4d n = m.cast(); + VERIFY(m.isApprox(n.cast())); + m2 = m.cast(); // check the specialization when NewType == Type + VERIFY(m.isApprox(m2)); +} +#endif + +template +void fixedSizeMatrixConstruction() +{ + Scalar raw[4]; + for(int k=0; k<4; ++k) + raw[k] = internal::random(); + + { + Matrix m(raw); + Array a(raw); + for(int k=0; k<4; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<4; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1],raw[2],raw[3]))); + VERIFY((a==(Array(raw[0],raw[1],raw[2],raw[3]))).all()); + } + { + Matrix m(raw); + Array a(raw); + for(int k=0; k<3; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<3; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1],raw[2]))); + VERIFY((a==Array(raw[0],raw[1],raw[2])).all()); + } + { + Matrix m(raw), m2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ); + Array a(raw), a2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ); + for(int k=0; k<2; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<2; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1]))); + VERIFY((a==Array(raw[0],raw[1])).all()); + for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k])); + for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k])); + } + { + Matrix m(raw), + m2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ), + m3( (int(raw[0])), (int(raw[1])) ), + m4( (float(raw[0])), (float(raw[1])) ); + Array a(raw), a2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ); + for(int k=0; k<2; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<2; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1]))); + VERIFY((a==Array(raw[0],raw[1])).all()); + for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k])); + for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k])); + for(int k=0; k<2; ++k) VERIFY(m3(k) == int(raw[k])); + for(int k=0; k<2; ++k) VERIFY((m4(k)) == Scalar(float(raw[k]))); + } + { + Matrix m(raw), m1(raw[0]), m2( (DenseIndex(raw[0])) ), m3( (int(raw[0])) ); + Array a(raw), a1(raw[0]), a2( (DenseIndex(raw[0])) ); + VERIFY(m(0) == raw[0]); + VERIFY(a(0) == raw[0]); + VERIFY(m1(0) == raw[0]); + VERIFY(a1(0) == raw[0]); + VERIFY(m2(0) == DenseIndex(raw[0])); + VERIFY(a2(0) == DenseIndex(raw[0])); + VERIFY(m3(0) == int(raw[0])); + VERIFY_IS_EQUAL(m,(Matrix(raw[0]))); + VERIFY((a==Array(raw[0])).all()); + } +} + +void test_basicstuff() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( basicStuff(Matrix()) ); + CALL_SUBTEST_2( basicStuff(Matrix4d()) ); + CALL_SUBTEST_3( basicStuff(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_4( basicStuff(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_5( basicStuff(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( basicStuff(Matrix()) ); + CALL_SUBTEST_7( basicStuff(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + + CALL_SUBTEST_3( basicStuffComplex(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_5( basicStuffComplex(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + + CALL_SUBTEST_2(casting()); +} diff --git a/thirdparty/eigen/test/bdcsvd.cpp b/thirdparty/eigen/test/bdcsvd.cpp new file mode 100644 index 000000000..f9f687aac --- /dev/null +++ b/thirdparty/eigen/test/bdcsvd.cpp @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Gauthier Brun +// Copyright (C) 2013 Nicolas Carre +// Copyright (C) 2013 Jean Ceccato +// Copyright (C) 2013 Pierre Zoppitelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/ + +// discard stack allocation as that too bypasses malloc +#define EIGEN_STACK_ALLOCATION_LIMIT 0 +#define EIGEN_RUNTIME_NO_MALLOC + +#include "main.h" +#include +#include +#include + + +#define SVD_DEFAULT(M) BDCSVD +#define SVD_FOR_MIN_NORM(M) BDCSVD +#include "svd_common.h" + +// Check all variants of JacobiSVD +template +void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true) +{ + MatrixType m = a; + if(pickrandom) + svd_fill_random(m); + + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); +} + +template +void bdcsvd_method() +{ + enum { Size = MatrixType::RowsAtCompileTime }; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix RealVecType; + MatrixType m = MatrixType::Identity(); + VERIFY_IS_APPROX(m.bdcSvd().singularValues(), RealVecType::Ones()); + VERIFY_RAISES_ASSERT(m.bdcSvd().matrixU()); + VERIFY_RAISES_ASSERT(m.bdcSvd().matrixV()); + VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).solve(m), m); +} + +// compare the Singular values returned with Jacobi and Bdc +template +void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0) +{ + MatrixType m = MatrixType::Random(a.rows(), a.cols()); + BDCSVD bdc_svd(m); + JacobiSVD jacobi_svd(m); + VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues()); + if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU()); + if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU()); + if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV()); + if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV()); +} + +void test_bdcsvd() +{ + CALL_SUBTEST_3(( svd_verify_assert >(Matrix3f()) )); + CALL_SUBTEST_4(( svd_verify_assert >(Matrix4d()) )); + CALL_SUBTEST_7(( svd_verify_assert >(MatrixXf(10,12)) )); + CALL_SUBTEST_8(( svd_verify_assert >(MatrixXcd(7,5)) )); + + CALL_SUBTEST_101(( svd_all_trivial_2x2(bdcsvd) )); + CALL_SUBTEST_102(( svd_all_trivial_2x2(bdcsvd) )); + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_3(( bdcsvd() )); + CALL_SUBTEST_4(( bdcsvd() )); + CALL_SUBTEST_5(( bdcsvd >() )); + + int r = internal::random(1, EIGEN_TEST_MAX_SIZE/2), + c = internal::random(1, EIGEN_TEST_MAX_SIZE/2); + + TEST_SET_BUT_UNUSED_VARIABLE(r) + TEST_SET_BUT_UNUSED_VARIABLE(c) + + CALL_SUBTEST_6(( bdcsvd(Matrix(r,2)) )); + CALL_SUBTEST_7(( bdcsvd(MatrixXf(r,c)) )); + CALL_SUBTEST_7(( compare_bdc_jacobi(MatrixXf(r,c)) )); + CALL_SUBTEST_10(( bdcsvd(MatrixXd(r,c)) )); + CALL_SUBTEST_10(( compare_bdc_jacobi(MatrixXd(r,c)) )); + CALL_SUBTEST_8(( bdcsvd(MatrixXcd(r,c)) )); + CALL_SUBTEST_8(( compare_bdc_jacobi(MatrixXcd(r,c)) )); + + // Test on inf/nan matrix + CALL_SUBTEST_7( (svd_inf_nan, MatrixXf>()) ); + CALL_SUBTEST_10( (svd_inf_nan, MatrixXd>()) ); + } + + // test matrixbase method + CALL_SUBTEST_1(( bdcsvd_method() )); + CALL_SUBTEST_3(( bdcsvd_method() )); + + // Test problem size constructors + CALL_SUBTEST_7( BDCSVD(10,10) ); + + // Check that preallocation avoids subsequent mallocs + CALL_SUBTEST_9( svd_preallocate() ); + + CALL_SUBTEST_2( svd_underoverflow() ); +} + diff --git a/thirdparty/eigen/test/bicgstab.cpp b/thirdparty/eigen/test/bicgstab.cpp new file mode 100644 index 000000000..4cc0dd31c --- /dev/null +++ b/thirdparty/eigen/test/bicgstab.cpp @@ -0,0 +1,34 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_bicgstab_T() +{ + BiCGSTAB, DiagonalPreconditioner > bicgstab_colmajor_diag; + BiCGSTAB, IdentityPreconditioner > bicgstab_colmajor_I; + BiCGSTAB, IncompleteLUT > bicgstab_colmajor_ilut; + //BiCGSTAB, SSORPreconditioner > bicgstab_colmajor_ssor; + + bicgstab_colmajor_diag.setTolerance(NumTraits::epsilon()*4); + bicgstab_colmajor_ilut.setTolerance(NumTraits::epsilon()*4); + + CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_diag) ); +// CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_I) ); + CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_ilut) ); + //CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_ssor) ); +} + +void test_bicgstab() +{ + CALL_SUBTEST_1((test_bicgstab_T()) ); + CALL_SUBTEST_2((test_bicgstab_T, int>())); + CALL_SUBTEST_3((test_bicgstab_T())); +} diff --git a/thirdparty/eigen/test/block.cpp b/thirdparty/eigen/test/block.cpp new file mode 100644 index 000000000..1eeb2da27 --- /dev/null +++ b/thirdparty/eigen/test/block.cpp @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT // otherwise we fail at compile time on unused paths +#include "main.h" + +template +typename Eigen::internal::enable_if::IsComplex,typename MatrixType::Scalar>::type +block_real_only(const MatrixType &m1, Index r1, Index r2, Index c1, Index c2, const Scalar& s1) { + // check cwise-Functions: + VERIFY_IS_APPROX(m1.row(r1).cwiseMax(s1), m1.cwiseMax(s1).row(r1)); + VERIFY_IS_APPROX(m1.col(c1).cwiseMin(s1), m1.cwiseMin(s1).col(c1)); + + VERIFY_IS_APPROX(m1.block(r1,c1,r2-r1+1,c2-c1+1).cwiseMin(s1), m1.cwiseMin(s1).block(r1,c1,r2-r1+1,c2-c1+1)); + VERIFY_IS_APPROX(m1.block(r1,c1,r2-r1+1,c2-c1+1).cwiseMax(s1), m1.cwiseMax(s1).block(r1,c1,r2-r1+1,c2-c1+1)); + + return Scalar(0); +} + +template +typename Eigen::internal::enable_if::IsComplex,typename MatrixType::Scalar>::type +block_real_only(const MatrixType &, Index, Index, Index, Index, const Scalar&) { + return Scalar(0); +} + + +template void block(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix VectorType; + typedef Matrix RowVectorType; + typedef Matrix DynamicMatrixType; + typedef Matrix DynamicVectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m1_copy = m1, + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + ones = MatrixType::Ones(rows, cols); + VectorType v1 = VectorType::Random(rows); + + Scalar s1 = internal::random(); + + Index r1 = internal::random(0,rows-1); + Index r2 = internal::random(r1,rows-1); + Index c1 = internal::random(0,cols-1); + Index c2 = internal::random(c1,cols-1); + + block_real_only(m1, r1, r2, c1, c1, s1); + + //check row() and col() + VERIFY_IS_EQUAL(m1.col(c1).transpose(), m1.transpose().row(c1)); + //check operator(), both constant and non-constant, on row() and col() + m1 = m1_copy; + m1.row(r1) += s1 * m1_copy.row(r2); + VERIFY_IS_APPROX(m1.row(r1), m1_copy.row(r1) + s1 * m1_copy.row(r2)); + // check nested block xpr on lhs + m1.row(r1).row(0) += s1 * m1_copy.row(r2); + VERIFY_IS_APPROX(m1.row(r1), m1_copy.row(r1) + Scalar(2) * s1 * m1_copy.row(r2)); + m1 = m1_copy; + m1.col(c1) += s1 * m1_copy.col(c2); + VERIFY_IS_APPROX(m1.col(c1), m1_copy.col(c1) + s1 * m1_copy.col(c2)); + m1.col(c1).col(0) += s1 * m1_copy.col(c2); + VERIFY_IS_APPROX(m1.col(c1), m1_copy.col(c1) + Scalar(2) * s1 * m1_copy.col(c2)); + + + //check block() + Matrix b1(1,1); b1(0,0) = m1(r1,c1); + + RowVectorType br1(m1.block(r1,0,1,cols)); + VectorType bc1(m1.block(0,c1,rows,1)); + VERIFY_IS_EQUAL(b1, m1.block(r1,c1,1,1)); + VERIFY_IS_EQUAL(m1.row(r1), br1); + VERIFY_IS_EQUAL(m1.col(c1), bc1); + //check operator(), both constant and non-constant, on block() + m1.block(r1,c1,r2-r1+1,c2-c1+1) = s1 * m2.block(0, 0, r2-r1+1,c2-c1+1); + m1.block(r1,c1,r2-r1+1,c2-c1+1)(r2-r1,c2-c1) = m2.block(0, 0, r2-r1+1,c2-c1+1)(0,0); + + enum { + BlockRows = 2, + BlockCols = 5 + }; + if (rows>=5 && cols>=8) + { + // test fixed block() as lvalue + m1.template block(1,1) *= s1; + // test operator() on fixed block() both as constant and non-constant + m1.template block(1,1)(0, 3) = m1.template block<2,5>(1,1)(1,2); + // check that fixed block() and block() agree + Matrix b = m1.template block(3,3); + VERIFY_IS_EQUAL(b, m1.block(3,3,BlockRows,BlockCols)); + + // same tests with mixed fixed/dynamic size + m1.template block(1,1,BlockRows,BlockCols) *= s1; + m1.template block(1,1,BlockRows,BlockCols)(0,3) = m1.template block<2,5>(1,1)(1,2); + Matrix b2 = m1.template block(3,3,2,5); + VERIFY_IS_EQUAL(b2, m1.block(3,3,BlockRows,BlockCols)); + } + + if (rows>2) + { + // test sub vectors + VERIFY_IS_EQUAL(v1.template head<2>(), v1.block(0,0,2,1)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.head(2)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.segment(0,2)); + VERIFY_IS_EQUAL(v1.template head<2>(), v1.template segment<2>(0)); + Index i = rows-2; + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.block(i,0,2,1)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.tail(2)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.segment(i,2)); + VERIFY_IS_EQUAL(v1.template tail<2>(), v1.template segment<2>(i)); + i = internal::random(0,rows-2); + VERIFY_IS_EQUAL(v1.segment(i,2), v1.template segment<2>(i)); + } + + // stress some basic stuffs with block matrices + VERIFY(numext::real(ones.col(c1).sum()) == RealScalar(rows)); + VERIFY(numext::real(ones.row(r1).sum()) == RealScalar(cols)); + + VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows)); + VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols)); + + // chekc that linear acccessors works on blocks + m1 = m1_copy; + if((MatrixType::Flags&RowMajorBit)==0) + VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1)); + else + VERIFY_IS_EQUAL(m1.topRows(r1).coeff(c1+r1*cols), m1(r1,c1)); + + + // now test some block-inside-of-block. + + // expressions with direct access + VERIFY_IS_EQUAL( (m1.block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , (m1.block(r2,c2,rows-r2,cols-c2)) ); + VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , (m1.row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , (m1.col(c1).segment(r1,r2-r1+1)) ); + VERIFY_IS_EQUAL( (m1.block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , (m1.row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_EQUAL( (m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , (m1.row(r1).segment(c1,c2-c1+1)).transpose() ); + + // expressions without direct access + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + + // evaluation into plain matrices from expressions with direct access (stress MapBase) + DynamicMatrixType dm; + DynamicVectorType dv; + dm.setZero(); + dm = m1.block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2); + VERIFY_IS_EQUAL(dm, (m1.block(r2,c2,rows-r2,cols-c2))); + dm.setZero(); + dv.setZero(); + dm = m1.block(r1,c1,r2-r1+1,c2-c1+1).row(0).transpose(); + dv = m1.row(r1).segment(c1,c2-c1+1); + VERIFY_IS_EQUAL(dv, dm); + dm.setZero(); + dv.setZero(); + dm = m1.col(c1).segment(r1,r2-r1+1); + dv = m1.block(r1,c1,r2-r1+1,c2-c1+1).col(0); + VERIFY_IS_EQUAL(dv, dm); + dm.setZero(); + dv.setZero(); + dm = m1.block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0); + dv = m1.row(r1).segment(c1,c2-c1+1); + VERIFY_IS_EQUAL(dv, dm); + dm.setZero(); + dv.setZero(); + dm = m1.row(r1).segment(c1,c2-c1+1).transpose(); + dv = m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0); + VERIFY_IS_EQUAL(dv, dm); + + VERIFY_IS_EQUAL( (m1.template block(1,0,0,1)), m1.block(1,0,0,1)); + VERIFY_IS_EQUAL( (m1.template block<1,Dynamic>(0,1,1,0)), m1.block(0,1,1,0)); + VERIFY_IS_EQUAL( ((m1*1).template block(1,0,0,1)), m1.block(1,0,0,1)); + VERIFY_IS_EQUAL( ((m1*1).template block<1,Dynamic>(0,1,1,0)), m1.block(0,1,1,0)); +} + + +template +void compare_using_data_and_stride(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + Index size = m.size(); + Index innerStride = m.innerStride(); + Index outerStride = m.outerStride(); + Index rowStride = m.rowStride(); + Index colStride = m.colStride(); + const typename MatrixType::Scalar* data = m.data(); + + for(int j=0;j +void data_and_stride(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + Index r1 = internal::random(0,rows-1); + Index r2 = internal::random(r1,rows-1); + Index c1 = internal::random(0,cols-1); + Index c2 = internal::random(c1,cols-1); + + MatrixType m1 = MatrixType::Random(rows, cols); + compare_using_data_and_stride(m1.block(r1, c1, r2-r1+1, c2-c1+1)); + compare_using_data_and_stride(m1.transpose().block(c1, r1, c2-c1+1, r2-r1+1)); + compare_using_data_and_stride(m1.row(r1)); + compare_using_data_and_stride(m1.col(c1)); + compare_using_data_and_stride(m1.row(r1).transpose()); + compare_using_data_and_stride(m1.col(c1).transpose()); +} + +void test_block() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( block(Matrix()) ); + CALL_SUBTEST_2( block(Matrix4d()) ); + CALL_SUBTEST_3( block(MatrixXcf(3, 3)) ); + CALL_SUBTEST_4( block(MatrixXi(8, 12)) ); + CALL_SUBTEST_5( block(MatrixXcd(20, 20)) ); + CALL_SUBTEST_6( block(MatrixXf(20, 20)) ); + + CALL_SUBTEST_8( block(Matrix(3, 4)) ); + +#ifndef EIGEN_DEFAULT_TO_ROW_MAJOR + CALL_SUBTEST_6( data_and_stride(MatrixXf(internal::random(5,50), internal::random(5,50))) ); + CALL_SUBTEST_7( data_and_stride(Matrix(internal::random(5,50), internal::random(5,50))) ); +#endif + } +} diff --git a/thirdparty/eigen/test/boostmultiprec.cpp b/thirdparty/eigen/test/boostmultiprec.cpp new file mode 100644 index 000000000..e06e9bdaf --- /dev/null +++ b/thirdparty/eigen/test/boostmultiprec.cpp @@ -0,0 +1,201 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#ifdef EIGEN_TEST_MAX_SIZE +#undef EIGEN_TEST_MAX_SIZE +#endif + +#define EIGEN_TEST_MAX_SIZE 50 + +#ifdef EIGEN_TEST_PART_1 +#include "cholesky.cpp" +#endif + +#ifdef EIGEN_TEST_PART_2 +#include "lu.cpp" +#endif + +#ifdef EIGEN_TEST_PART_3 +#include "qr.cpp" +#endif + +#ifdef EIGEN_TEST_PART_4 +#include "qr_colpivoting.cpp" +#endif + +#ifdef EIGEN_TEST_PART_5 +#include "qr_fullpivoting.cpp" +#endif + +#ifdef EIGEN_TEST_PART_6 +#include "eigensolver_selfadjoint.cpp" +#endif + +#ifdef EIGEN_TEST_PART_7 +#include "eigensolver_generic.cpp" +#endif + +#ifdef EIGEN_TEST_PART_8 +#include "eigensolver_generalized_real.cpp" +#endif + +#ifdef EIGEN_TEST_PART_9 +#include "jacobisvd.cpp" +#endif + +#ifdef EIGEN_TEST_PART_10 +#include "bdcsvd.cpp" +#endif + +#include + +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite + +#include +#include +#include +#include + +namespace mp = boost::multiprecision; +typedef mp::number, mp::et_on> Real; + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits { + static inline Real dummy_precision() { return 1e-50; } + }; + + template + struct NumTraits > : NumTraits {}; + + template<> + Real test_precision() { return 1e-50; } + + // needed in C++93 mode where number does not support explicit cast. + namespace internal { + template + struct cast_impl { + static inline NewType run(const Real& x) { + return x.template convert_to(); + } + }; + + template<> + struct cast_impl > { + static inline std::complex run(const Real& x) { + return std::complex(x); + } + }; + } +} + +namespace boost { +namespace multiprecision { + // to make ADL works as expected: + using boost::math::isfinite; + using boost::math::isnan; + using boost::math::isinf; + using boost::math::copysign; + using boost::math::hypot; + + // The following is needed for std::complex: + Real fabs(const Real& a) { return abs EIGEN_NOT_A_MACRO (a); } + Real fmax(const Real& a, const Real& b) { using std::max; return max(a,b); } + + // some specialization for the unit tests: + inline bool test_isMuchSmallerThan(const Real& a, const Real& b) { + return internal::isMuchSmallerThan(a, b, test_precision()); + } + + inline bool test_isApprox(const Real& a, const Real& b) { + return internal::isApprox(a, b, test_precision()); + } + + inline bool test_isApproxOrLessThan(const Real& a, const Real& b) { + return internal::isApproxOrLessThan(a, b, test_precision()); + } + + Real get_test_precision(const Real&) { + return test_precision(); + } + + Real test_relative_error(const Real &a, const Real &b) { + using Eigen::numext::abs2; + return sqrt(abs2(a-b)/Eigen::numext::mini(abs2(a),abs2(b))); + } +} +} + +namespace Eigen { + +} + +void test_boostmultiprec() +{ + typedef Matrix Mat; + typedef Matrix,Dynamic,Dynamic> MatC; + + std::cout << "NumTraits::epsilon() = " << NumTraits::epsilon() << std::endl; + std::cout << "NumTraits::dummy_precision() = " << NumTraits::dummy_precision() << std::endl; + std::cout << "NumTraits::lowest() = " << NumTraits::lowest() << std::endl; + std::cout << "NumTraits::highest() = " << NumTraits::highest() << std::endl; + std::cout << "NumTraits::digits10() = " << NumTraits::digits10() << std::endl; + + // chekc stream output + { + Mat A(10,10); + A.setRandom(); + std::stringstream ss; + ss << A; + } + { + MatC A(10,10); + A.setRandom(); + std::stringstream ss; + ss << A; + } + + for(int i = 0; i < g_repeat; i++) { + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); + + CALL_SUBTEST_1( cholesky(Mat(s,s)) ); + + CALL_SUBTEST_2( lu_non_invertible() ); + CALL_SUBTEST_2( lu_invertible() ); + CALL_SUBTEST_2( lu_non_invertible() ); + CALL_SUBTEST_2( lu_invertible() ); + + CALL_SUBTEST_3( qr(Mat(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_3( qr_invertible() ); + + CALL_SUBTEST_4( qr() ); + CALL_SUBTEST_4( cod() ); + CALL_SUBTEST_4( qr_invertible() ); + + CALL_SUBTEST_5( qr() ); + CALL_SUBTEST_5( qr_invertible() ); + + CALL_SUBTEST_6( selfadjointeigensolver(Mat(s,s)) ); + + CALL_SUBTEST_7( eigensolver(Mat(s,s)) ); + + CALL_SUBTEST_8( generalized_eigensolver_real(Mat(s,s)) ); + + TEST_SET_BUT_UNUSED_VARIABLE(s) + } + + CALL_SUBTEST_9(( jacobisvd(Mat(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); + CALL_SUBTEST_10(( bdcsvd(Mat(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); +} + diff --git a/thirdparty/eigen/test/bug1213.cpp b/thirdparty/eigen/test/bug1213.cpp new file mode 100644 index 000000000..581760c1a --- /dev/null +++ b/thirdparty/eigen/test/bug1213.cpp @@ -0,0 +1,13 @@ + +// This anonymous enum is essential to trigger the linking issue +enum { + Foo +}; + +#include "bug1213.h" + +bool bug1213_1(const Eigen::Vector3f& x) +{ + return bug1213_2(x); +} + diff --git a/thirdparty/eigen/test/bug1213.h b/thirdparty/eigen/test/bug1213.h new file mode 100644 index 000000000..040e5a470 --- /dev/null +++ b/thirdparty/eigen/test/bug1213.h @@ -0,0 +1,8 @@ + +#include + +template +bool bug1213_2(const Eigen::Matrix& x); + +bool bug1213_1(const Eigen::Vector3f& x); + diff --git a/thirdparty/eigen/test/bug1213_main.cpp b/thirdparty/eigen/test/bug1213_main.cpp new file mode 100644 index 000000000..4802c0003 --- /dev/null +++ b/thirdparty/eigen/test/bug1213_main.cpp @@ -0,0 +1,18 @@ + +// This is a regression unit regarding a weird linking issue with gcc. + +#include "bug1213.h" + +int main() +{ + return 0; +} + + +template +bool bug1213_2(const Eigen::Matrix& ) +{ + return true; +} + +template bool bug1213_2(const Eigen::Vector3f&); diff --git a/thirdparty/eigen/test/cholesky.cpp b/thirdparty/eigen/test/cholesky.cpp new file mode 100644 index 000000000..8ad5ac639 --- /dev/null +++ b/thirdparty/eigen/test/cholesky.cpp @@ -0,0 +1,509 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NO_ASSERTION_CHECKING +#define EIGEN_NO_ASSERTION_CHECKING +#endif + +#define TEST_ENABLE_TEMPORARY_TRACKING + +#include "main.h" +#include +#include + +template +typename MatrixType::RealScalar matrix_l1_norm(const MatrixType& m) { + MatrixType symm = m.template selfadjointView(); + return symm.cwiseAbs().colwise().sum().maxCoeff(); +} + +template class CholType> void test_chol_update(const MatrixType& symm) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix VectorType; + + MatrixType symmLo = symm.template triangularView(); + MatrixType symmUp = symm.template triangularView(); + MatrixType symmCpy = symm; + + CholType chollo(symmLo); + CholType cholup(symmUp); + + for (int k=0; k<10; ++k) + { + VectorType vec = VectorType::Random(symm.rows()); + RealScalar sigma = internal::random(); + symmCpy += sigma * vec * vec.adjoint(); + + // we are doing some downdates, so it might be the case that the matrix is not SPD anymore + CholType chol(symmCpy); + if(chol.info()!=Success) + break; + + chollo.rankUpdate(vec, sigma); + VERIFY_IS_APPROX(symmCpy, chollo.reconstructedMatrix()); + + cholup.rankUpdate(vec, sigma); + VERIFY_IS_APPROX(symmCpy, cholup.reconstructedMatrix()); + } +} + +template void cholesky(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + /* this test covers the following files: + LLT.h LDLT.h + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix SquareMatrixType; + typedef Matrix VectorType; + + MatrixType a0 = MatrixType::Random(rows,cols); + VectorType vecB = VectorType::Random(rows), vecX(rows); + MatrixType matB = MatrixType::Random(rows,cols), matX(rows,cols); + SquareMatrixType symm = a0 * a0.adjoint(); + // let's make sure the matrix is not singular or near singular + for (int k=0; k<3; ++k) + { + MatrixType a1 = MatrixType::Random(rows,cols); + symm += a1 * a1.adjoint(); + } + + { + SquareMatrixType symmUp = symm.template triangularView(); + SquareMatrixType symmLo = symm.template triangularView(); + + LLT chollo(symmLo); + VERIFY_IS_APPROX(symm, chollo.reconstructedMatrix()); + vecX = chollo.solve(vecB); + VERIFY_IS_APPROX(symm * vecX, vecB); + matX = chollo.solve(matB); + VERIFY_IS_APPROX(symm * matX, matB); + + const MatrixType symmLo_inverse = chollo.solve(MatrixType::Identity(rows,cols)); + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(symmLo)) / + matrix_l1_norm(symmLo_inverse); + RealScalar rcond_est = chollo.rcond(); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + // test the upper mode + LLT cholup(symmUp); + VERIFY_IS_APPROX(symm, cholup.reconstructedMatrix()); + vecX = cholup.solve(vecB); + VERIFY_IS_APPROX(symm * vecX, vecB); + matX = cholup.solve(matB); + VERIFY_IS_APPROX(symm * matX, matB); + + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + const MatrixType symmUp_inverse = cholup.solve(MatrixType::Identity(rows,cols)); + rcond = (RealScalar(1) / matrix_l1_norm(symmUp)) / + matrix_l1_norm(symmUp_inverse); + rcond_est = cholup.rcond(); + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + + MatrixType neg = -symmLo; + chollo.compute(neg); + VERIFY(chollo.info()==NumericalIssue); + + VERIFY_IS_APPROX(MatrixType(chollo.matrixL().transpose().conjugate()), MatrixType(chollo.matrixU())); + VERIFY_IS_APPROX(MatrixType(chollo.matrixU().transpose().conjugate()), MatrixType(chollo.matrixL())); + VERIFY_IS_APPROX(MatrixType(cholup.matrixL().transpose().conjugate()), MatrixType(cholup.matrixU())); + VERIFY_IS_APPROX(MatrixType(cholup.matrixU().transpose().conjugate()), MatrixType(cholup.matrixL())); + + // test some special use cases of SelfCwiseBinaryOp: + MatrixType m1 = MatrixType::Random(rows,cols), m2(rows,cols); + m2 = m1; + m2 += symmLo.template selfadjointView().llt().solve(matB); + VERIFY_IS_APPROX(m2, m1 + symmLo.template selfadjointView().llt().solve(matB)); + m2 = m1; + m2 -= symmLo.template selfadjointView().llt().solve(matB); + VERIFY_IS_APPROX(m2, m1 - symmLo.template selfadjointView().llt().solve(matB)); + m2 = m1; + m2.noalias() += symmLo.template selfadjointView().llt().solve(matB); + VERIFY_IS_APPROX(m2, m1 + symmLo.template selfadjointView().llt().solve(matB)); + m2 = m1; + m2.noalias() -= symmLo.template selfadjointView().llt().solve(matB); + VERIFY_IS_APPROX(m2, m1 - symmLo.template selfadjointView().llt().solve(matB)); + } + + // LDLT + { + int sign = internal::random()%2 ? 1 : -1; + + if(sign == -1) + { + symm = -symm; // test a negative matrix + } + + SquareMatrixType symmUp = symm.template triangularView(); + SquareMatrixType symmLo = symm.template triangularView(); + + LDLT ldltlo(symmLo); + VERIFY(ldltlo.info()==Success); + VERIFY_IS_APPROX(symm, ldltlo.reconstructedMatrix()); + vecX = ldltlo.solve(vecB); + VERIFY_IS_APPROX(symm * vecX, vecB); + matX = ldltlo.solve(matB); + VERIFY_IS_APPROX(symm * matX, matB); + + const MatrixType symmLo_inverse = ldltlo.solve(MatrixType::Identity(rows,cols)); + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(symmLo)) / + matrix_l1_norm(symmLo_inverse); + RealScalar rcond_est = ldltlo.rcond(); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + + LDLT ldltup(symmUp); + VERIFY(ldltup.info()==Success); + VERIFY_IS_APPROX(symm, ldltup.reconstructedMatrix()); + vecX = ldltup.solve(vecB); + VERIFY_IS_APPROX(symm * vecX, vecB); + matX = ldltup.solve(matB); + VERIFY_IS_APPROX(symm * matX, matB); + + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + const MatrixType symmUp_inverse = ldltup.solve(MatrixType::Identity(rows,cols)); + rcond = (RealScalar(1) / matrix_l1_norm(symmUp)) / + matrix_l1_norm(symmUp_inverse); + rcond_est = ldltup.rcond(); + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + VERIFY_IS_APPROX(MatrixType(ldltlo.matrixL().transpose().conjugate()), MatrixType(ldltlo.matrixU())); + VERIFY_IS_APPROX(MatrixType(ldltlo.matrixU().transpose().conjugate()), MatrixType(ldltlo.matrixL())); + VERIFY_IS_APPROX(MatrixType(ldltup.matrixL().transpose().conjugate()), MatrixType(ldltup.matrixU())); + VERIFY_IS_APPROX(MatrixType(ldltup.matrixU().transpose().conjugate()), MatrixType(ldltup.matrixL())); + + if(MatrixType::RowsAtCompileTime==Dynamic) + { + // note : each inplace permutation requires a small temporary vector (mask) + + // check inplace solve + matX = matB; + VERIFY_EVALUATION_COUNT(matX = ldltlo.solve(matX), 0); + VERIFY_IS_APPROX(matX, ldltlo.solve(matB).eval()); + + + matX = matB; + VERIFY_EVALUATION_COUNT(matX = ldltup.solve(matX), 0); + VERIFY_IS_APPROX(matX, ldltup.solve(matB).eval()); + } + + // restore + if(sign == -1) + symm = -symm; + + // check matrices coming from linear constraints with Lagrange multipliers + if(rows>=3) + { + SquareMatrixType A = symm; + Index c = internal::random(0,rows-2); + A.bottomRightCorner(c,c).setZero(); + // Make sure a solution exists: + vecX.setRandom(); + vecB = A * vecX; + vecX.setZero(); + ldltlo.compute(A); + VERIFY_IS_APPROX(A, ldltlo.reconstructedMatrix()); + vecX = ldltlo.solve(vecB); + VERIFY_IS_APPROX(A * vecX, vecB); + } + + // check non-full rank matrices + if(rows>=3) + { + Index r = internal::random(1,rows-1); + Matrix a = Matrix::Random(rows,r); + SquareMatrixType A = a * a.adjoint(); + // Make sure a solution exists: + vecX.setRandom(); + vecB = A * vecX; + vecX.setZero(); + ldltlo.compute(A); + VERIFY_IS_APPROX(A, ldltlo.reconstructedMatrix()); + vecX = ldltlo.solve(vecB); + VERIFY_IS_APPROX(A * vecX, vecB); + } + + // check matrices with a wide spectrum + if(rows>=3) + { + using std::pow; + using std::sqrt; + RealScalar s = (std::min)(16,std::numeric_limits::max_exponent10/8); + Matrix a = Matrix::Random(rows,rows); + Matrix d = Matrix::Random(rows); + for(Index k=0; k(-s,s)); + SquareMatrixType A = a * d.asDiagonal() * a.adjoint(); + // Make sure a solution exists: + vecX.setRandom(); + vecB = A * vecX; + vecX.setZero(); + ldltlo.compute(A); + VERIFY_IS_APPROX(A, ldltlo.reconstructedMatrix()); + vecX = ldltlo.solve(vecB); + + if(ldltlo.vectorD().real().cwiseAbs().minCoeff()>RealScalar(0)) + { + VERIFY_IS_APPROX(A * vecX,vecB); + } + else + { + RealScalar large_tol = sqrt(test_precision()); + VERIFY((A * vecX).isApprox(vecB, large_tol)); + + ++g_test_level; + VERIFY_IS_APPROX(A * vecX,vecB); + --g_test_level; + } + } + } + + // update/downdate + CALL_SUBTEST(( test_chol_update(symm) )); + CALL_SUBTEST(( test_chol_update(symm) )); +} + +template void cholesky_cplx(const MatrixType& m) +{ + // classic test + cholesky(m); + + // test mixing real/scalar types + + typedef typename MatrixType::Index Index; + + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix RealMatrixType; + typedef Matrix VectorType; + + RealMatrixType a0 = RealMatrixType::Random(rows,cols); + VectorType vecB = VectorType::Random(rows), vecX(rows); + MatrixType matB = MatrixType::Random(rows,cols), matX(rows,cols); + RealMatrixType symm = a0 * a0.adjoint(); + // let's make sure the matrix is not singular or near singular + for (int k=0; k<3; ++k) + { + RealMatrixType a1 = RealMatrixType::Random(rows,cols); + symm += a1 * a1.adjoint(); + } + + { + RealMatrixType symmLo = symm.template triangularView(); + + LLT chollo(symmLo); + VERIFY_IS_APPROX(symm, chollo.reconstructedMatrix()); + vecX = chollo.solve(vecB); + VERIFY_IS_APPROX(symm * vecX, vecB); +// matX = chollo.solve(matB); +// VERIFY_IS_APPROX(symm * matX, matB); + } + + // LDLT + { + int sign = internal::random()%2 ? 1 : -1; + + if(sign == -1) + { + symm = -symm; // test a negative matrix + } + + RealMatrixType symmLo = symm.template triangularView(); + + LDLT ldltlo(symmLo); + VERIFY(ldltlo.info()==Success); + VERIFY_IS_APPROX(symm, ldltlo.reconstructedMatrix()); + vecX = ldltlo.solve(vecB); + VERIFY_IS_APPROX(symm * vecX, vecB); +// matX = ldltlo.solve(matB); +// VERIFY_IS_APPROX(symm * matX, matB); + } +} + +// regression test for bug 241 +template void cholesky_bug241(const MatrixType& m) +{ + eigen_assert(m.rows() == 2 && m.cols() == 2); + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + + MatrixType matA; + matA << 1, 1, 1, 1; + VectorType vecB; + vecB << 1, 1; + VectorType vecX = matA.ldlt().solve(vecB); + VERIFY_IS_APPROX(matA * vecX, vecB); +} + +// LDLT is not guaranteed to work for indefinite matrices, but happens to work fine if matrix is diagonal. +// This test checks that LDLT reports correctly that matrix is indefinite. +// See http://forum.kde.org/viewtopic.php?f=74&t=106942 and bug 736 +template void cholesky_definiteness(const MatrixType& m) +{ + eigen_assert(m.rows() == 2 && m.cols() == 2); + MatrixType mat; + LDLT ldlt(2); + + { + mat << 1, 0, 0, -1; + ldlt.compute(mat); + VERIFY(ldlt.info()==Success); + VERIFY(!ldlt.isNegative()); + VERIFY(!ldlt.isPositive()); + } + { + mat << 1, 2, 2, 1; + ldlt.compute(mat); + VERIFY(ldlt.info()==Success); + VERIFY(!ldlt.isNegative()); + VERIFY(!ldlt.isPositive()); + } + { + mat << 0, 0, 0, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==Success); + VERIFY(ldlt.isNegative()); + VERIFY(ldlt.isPositive()); + } + { + mat << 0, 0, 0, 1; + ldlt.compute(mat); + VERIFY(ldlt.info()==Success); + VERIFY(!ldlt.isNegative()); + VERIFY(ldlt.isPositive()); + } + { + mat << -1, 0, 0, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==Success); + VERIFY(ldlt.isNegative()); + VERIFY(!ldlt.isPositive()); + } +} + +template +void cholesky_faillure_cases() +{ + MatrixXd mat; + LDLT ldlt; + + { + mat.resize(2,2); + mat << 0, 1, 1, 0; + ldlt.compute(mat); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + VERIFY(ldlt.info()==NumericalIssue); + } +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2) + { + mat.resize(3,3); + mat << -1, -3, 3, + -3, -8.9999999999999999999, 1, + 3, 1, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } +#endif + { + mat.resize(3,3); + mat << 1, 2, 3, + 2, 4, 1, + 3, 1, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } + + { + mat.resize(8,8); + mat << 0.1, 0, -0.1, 0, 0, 0, 1, 0, + 0, 4.24667, 0, 2.00333, 0, 0, 0, 0, + -0.1, 0, 0.2, 0, -0.1, 0, 0, 0, + 0, 2.00333, 0, 8.49333, 0, 2.00333, 0, 0, + 0, 0, -0.1, 0, 0.1, 0, 0, 1, + 0, 0, 0, 2.00333, 0, 4.24667, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } +} + +template void cholesky_verify_assert() +{ + MatrixType tmp; + + LLT llt; + VERIFY_RAISES_ASSERT(llt.matrixL()) + VERIFY_RAISES_ASSERT(llt.matrixU()) + VERIFY_RAISES_ASSERT(llt.solve(tmp)) + VERIFY_RAISES_ASSERT(llt.solveInPlace(&tmp)) + + LDLT ldlt; + VERIFY_RAISES_ASSERT(ldlt.matrixL()) + VERIFY_RAISES_ASSERT(ldlt.permutationP()) + VERIFY_RAISES_ASSERT(ldlt.vectorD()) + VERIFY_RAISES_ASSERT(ldlt.isPositive()) + VERIFY_RAISES_ASSERT(ldlt.isNegative()) + VERIFY_RAISES_ASSERT(ldlt.solve(tmp)) + VERIFY_RAISES_ASSERT(ldlt.solveInPlace(&tmp)) +} + +void test_cholesky() +{ + int s = 0; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( cholesky(Matrix()) ); + CALL_SUBTEST_3( cholesky(Matrix2d()) ); + CALL_SUBTEST_3( cholesky_bug241(Matrix2d()) ); + CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) ); + CALL_SUBTEST_4( cholesky(Matrix3f()) ); + CALL_SUBTEST_5( cholesky(Matrix4d()) ); + + s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_2( cholesky(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); + CALL_SUBTEST_6( cholesky_cplx(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } + + CALL_SUBTEST_4( cholesky_verify_assert() ); + CALL_SUBTEST_7( cholesky_verify_assert() ); + CALL_SUBTEST_8( cholesky_verify_assert() ); + CALL_SUBTEST_2( cholesky_verify_assert() ); + + // Test problem size constructors + CALL_SUBTEST_9( LLT(10) ); + CALL_SUBTEST_9( LDLT(10) ); + + CALL_SUBTEST_2( cholesky_faillure_cases() ); + + TEST_SET_BUT_UNUSED_VARIABLE(nb_temporaries) +} diff --git a/thirdparty/eigen/test/cholmod_support.cpp b/thirdparty/eigen/test/cholmod_support.cpp new file mode 100644 index 000000000..a7eda28f7 --- /dev/null +++ b/thirdparty/eigen/test/cholmod_support.cpp @@ -0,0 +1,57 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#include "sparse_solver.h" + +#include + +template void test_cholmod_T() +{ + CholmodDecomposition, Lower> g_chol_colmajor_lower; g_chol_colmajor_lower.setMode(CholmodSupernodalLLt); + CholmodDecomposition, Upper> g_chol_colmajor_upper; g_chol_colmajor_upper.setMode(CholmodSupernodalLLt); + CholmodDecomposition, Lower> g_llt_colmajor_lower; g_llt_colmajor_lower.setMode(CholmodSimplicialLLt); + CholmodDecomposition, Upper> g_llt_colmajor_upper; g_llt_colmajor_upper.setMode(CholmodSimplicialLLt); + CholmodDecomposition, Lower> g_ldlt_colmajor_lower; g_ldlt_colmajor_lower.setMode(CholmodLDLt); + CholmodDecomposition, Upper> g_ldlt_colmajor_upper; g_ldlt_colmajor_upper.setMode(CholmodLDLt); + + CholmodSupernodalLLT, Lower> chol_colmajor_lower; + CholmodSupernodalLLT, Upper> chol_colmajor_upper; + CholmodSimplicialLLT, Lower> llt_colmajor_lower; + CholmodSimplicialLLT, Upper> llt_colmajor_upper; + CholmodSimplicialLDLT, Lower> ldlt_colmajor_lower; + CholmodSimplicialLDLT, Upper> ldlt_colmajor_upper; + + check_sparse_spd_solving(g_chol_colmajor_lower); + check_sparse_spd_solving(g_chol_colmajor_upper); + check_sparse_spd_solving(g_llt_colmajor_lower); + check_sparse_spd_solving(g_llt_colmajor_upper); + check_sparse_spd_solving(g_ldlt_colmajor_lower); + check_sparse_spd_solving(g_ldlt_colmajor_upper); + + check_sparse_spd_solving(chol_colmajor_lower); + check_sparse_spd_solving(chol_colmajor_upper); + check_sparse_spd_solving(llt_colmajor_lower); + check_sparse_spd_solving(llt_colmajor_upper); + check_sparse_spd_solving(ldlt_colmajor_lower); + check_sparse_spd_solving(ldlt_colmajor_upper); + + check_sparse_spd_determinant(chol_colmajor_lower); + check_sparse_spd_determinant(chol_colmajor_upper); + check_sparse_spd_determinant(llt_colmajor_lower); + check_sparse_spd_determinant(llt_colmajor_upper); + check_sparse_spd_determinant(ldlt_colmajor_lower); + check_sparse_spd_determinant(ldlt_colmajor_upper); +} + +void test_cholmod_support() +{ + CALL_SUBTEST_1(test_cholmod_T()); + CALL_SUBTEST_2(test_cholmod_T >()); +} diff --git a/thirdparty/eigen/test/commainitializer.cpp b/thirdparty/eigen/test/commainitializer.cpp new file mode 100644 index 000000000..9844adbd2 --- /dev/null +++ b/thirdparty/eigen/test/commainitializer.cpp @@ -0,0 +1,106 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + + +template +void test_blocks() +{ + Matrix m_fixed; + MatrixXi m_dynamic(M1+M2, N1+N2); + + Matrix mat11; mat11.setRandom(); + Matrix mat12; mat12.setRandom(); + Matrix mat21; mat21.setRandom(); + Matrix mat22; mat22.setRandom(); + + MatrixXi matx11 = mat11, matx12 = mat12, matx21 = mat21, matx22 = mat22; + + { + VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat21, matx22).finished(), (m_dynamic << mat11, matx12, mat21, matx22).finished()); + VERIFY_IS_EQUAL((m_fixed.template topLeftCorner()), mat11); + VERIFY_IS_EQUAL((m_fixed.template topRightCorner()), mat12); + VERIFY_IS_EQUAL((m_fixed.template bottomLeftCorner()), mat21); + VERIFY_IS_EQUAL((m_fixed.template bottomRightCorner()), mat22); + VERIFY_IS_EQUAL((m_fixed << mat12, mat11, matx21, mat22).finished(), (m_dynamic << mat12, matx11, matx21, mat22).finished()); + } + + if(N1 > 0) + { + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat11, mat21, mat22)); + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat21, mat21, mat22)); + } + else + { + // allow insertion of zero-column blocks: + VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat11, mat11, mat21, mat21, mat22).finished(), (m_dynamic << mat12, mat22).finished()); + } + if(M1 != M2) + { + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat21, mat12, mat22)); + } +} + + +template +struct test_block_recursion +{ + static void run() + { + test_blocks<(N>>6)&3, (N>>4)&3, (N>>2)&3, N & 3>(); + test_block_recursion::run(); + } +}; + +template<> +struct test_block_recursion<-1> +{ + static void run() { } +}; + +void test_commainitializer() +{ + Matrix3d m3; + Matrix4d m4; + + VERIFY_RAISES_ASSERT( (m3 << 1, 2, 3, 4, 5, 6, 7, 8) ); + + #ifndef _MSC_VER + VERIFY_RAISES_ASSERT( (m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ); + #endif + + double data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + Matrix3d ref = Map >(data); + + m3 = Matrix3d::Random(); + m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9; + VERIFY_IS_APPROX(m3, ref ); + + Vector3d vec[3]; + vec[0] << 1, 4, 7; + vec[1] << 2, 5, 8; + vec[2] << 3, 6, 9; + m3 = Matrix3d::Random(); + m3 << vec[0], vec[1], vec[2]; + VERIFY_IS_APPROX(m3, ref); + + vec[0] << 1, 2, 3; + vec[1] << 4, 5, 6; + vec[2] << 7, 8, 9; + m3 = Matrix3d::Random(); + m3 << vec[0].transpose(), + 4, 5, 6, + vec[2].transpose(); + VERIFY_IS_APPROX(m3, ref); + + + // recursively test all block-sizes from 0 to 3: + test_block_recursion<(1<<8) - 1>(); +} diff --git a/thirdparty/eigen/test/conjugate_gradient.cpp b/thirdparty/eigen/test/conjugate_gradient.cpp new file mode 100644 index 000000000..9622fd86d --- /dev/null +++ b/thirdparty/eigen/test/conjugate_gradient.cpp @@ -0,0 +1,34 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_conjugate_gradient_T() +{ + typedef SparseMatrix SparseMatrixType; + ConjugateGradient cg_colmajor_lower_diag; + ConjugateGradient cg_colmajor_upper_diag; + ConjugateGradient cg_colmajor_loup_diag; + ConjugateGradient cg_colmajor_lower_I; + ConjugateGradient cg_colmajor_upper_I; + + CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_lower_diag) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_upper_diag) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_loup_diag) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_lower_I) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_upper_I) ); +} + +void test_conjugate_gradient() +{ + CALL_SUBTEST_1(( test_conjugate_gradient_T() )); + CALL_SUBTEST_2(( test_conjugate_gradient_T, int>() )); + CALL_SUBTEST_3(( test_conjugate_gradient_T() )); +} diff --git a/thirdparty/eigen/test/conservative_resize.cpp b/thirdparty/eigen/test/conservative_resize.cpp new file mode 100644 index 000000000..498421b4c --- /dev/null +++ b/thirdparty/eigen/test/conservative_resize.cpp @@ -0,0 +1,134 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using namespace Eigen; + +template +void run_matrix_tests() +{ + typedef Matrix MatrixType; + typedef typename MatrixType::Index Index; + + MatrixType m, n; + + // boundary cases ... + m = n = MatrixType::Random(50,50); + m.conservativeResize(1,50); + VERIFY_IS_APPROX(m, n.block(0,0,1,50)); + + m = n = MatrixType::Random(50,50); + m.conservativeResize(50,1); + VERIFY_IS_APPROX(m, n.block(0,0,50,1)); + + m = n = MatrixType::Random(50,50); + m.conservativeResize(50,50); + VERIFY_IS_APPROX(m, n.block(0,0,50,50)); + + // random shrinking ... + for (int i=0; i<25; ++i) + { + const Index rows = internal::random(1,50); + const Index cols = internal::random(1,50); + m = n = MatrixType::Random(50,50); + m.conservativeResize(rows,cols); + VERIFY_IS_APPROX(m, n.block(0,0,rows,cols)); + } + + // random growing with zeroing ... + for (int i=0; i<25; ++i) + { + const Index rows = internal::random(50,75); + const Index cols = internal::random(50,75); + m = n = MatrixType::Random(50,50); + m.conservativeResizeLike(MatrixType::Zero(rows,cols)); + VERIFY_IS_APPROX(m.block(0,0,n.rows(),n.cols()), n); + VERIFY( rows<=50 || m.block(50,0,rows-50,cols).sum() == Scalar(0) ); + VERIFY( cols<=50 || m.block(0,50,rows,cols-50).sum() == Scalar(0) ); + } +} + +template +void run_vector_tests() +{ + typedef Matrix VectorType; + + VectorType m, n; + + // boundary cases ... + m = n = VectorType::Random(50); + m.conservativeResize(1); + VERIFY_IS_APPROX(m, n.segment(0,1)); + + m = n = VectorType::Random(50); + m.conservativeResize(50); + VERIFY_IS_APPROX(m, n.segment(0,50)); + + m = n = VectorType::Random(50); + m.conservativeResize(m.rows(),1); + VERIFY_IS_APPROX(m, n.segment(0,1)); + + m = n = VectorType::Random(50); + m.conservativeResize(m.rows(),50); + VERIFY_IS_APPROX(m, n.segment(0,50)); + + // random shrinking ... + for (int i=0; i<50; ++i) + { + const int size = internal::random(1,50); + m = n = VectorType::Random(50); + m.conservativeResize(size); + VERIFY_IS_APPROX(m, n.segment(0,size)); + + m = n = VectorType::Random(50); + m.conservativeResize(m.rows(), size); + VERIFY_IS_APPROX(m, n.segment(0,size)); + } + + // random growing with zeroing ... + for (int i=0; i<50; ++i) + { + const int size = internal::random(50,100); + m = n = VectorType::Random(50); + m.conservativeResizeLike(VectorType::Zero(size)); + VERIFY_IS_APPROX(m.segment(0,50), n); + VERIFY( size<=50 || m.segment(50,size-50).sum() == Scalar(0) ); + + m = n = VectorType::Random(50); + m.conservativeResizeLike(Matrix::Zero(1,size)); + VERIFY_IS_APPROX(m.segment(0,50), n); + VERIFY( size<=50 || m.segment(50,size-50).sum() == Scalar(0) ); + } +} + +void test_conservative_resize() +{ + for(int i=0; i())); + CALL_SUBTEST_1((run_matrix_tests())); + CALL_SUBTEST_2((run_matrix_tests())); + CALL_SUBTEST_2((run_matrix_tests())); + CALL_SUBTEST_3((run_matrix_tests())); + CALL_SUBTEST_3((run_matrix_tests())); + CALL_SUBTEST_4((run_matrix_tests, Eigen::RowMajor>())); + CALL_SUBTEST_4((run_matrix_tests, Eigen::ColMajor>())); + CALL_SUBTEST_5((run_matrix_tests, Eigen::RowMajor>())); + CALL_SUBTEST_6((run_matrix_tests, Eigen::ColMajor>())); + + CALL_SUBTEST_1((run_vector_tests())); + CALL_SUBTEST_2((run_vector_tests())); + CALL_SUBTEST_3((run_vector_tests())); + CALL_SUBTEST_4((run_vector_tests >())); + CALL_SUBTEST_5((run_vector_tests >())); + } +} diff --git a/thirdparty/eigen/test/corners.cpp b/thirdparty/eigen/test/corners.cpp new file mode 100644 index 000000000..3c64c32a1 --- /dev/null +++ b/thirdparty/eigen/test/corners.cpp @@ -0,0 +1,118 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#define COMPARE_CORNER(A,B) \ + VERIFY_IS_EQUAL(matrix.A, matrix.B); \ + VERIFY_IS_EQUAL(const_matrix.A, const_matrix.B); + +template void corners(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + Index r = internal::random(1,rows); + Index c = internal::random(1,cols); + + MatrixType matrix = MatrixType::Random(rows,cols); + const MatrixType const_matrix = MatrixType::Random(rows,cols); + + COMPARE_CORNER(topLeftCorner(r,c), block(0,0,r,c)); + COMPARE_CORNER(topRightCorner(r,c), block(0,cols-c,r,c)); + COMPARE_CORNER(bottomLeftCorner(r,c), block(rows-r,0,r,c)); + COMPARE_CORNER(bottomRightCorner(r,c), block(rows-r,cols-c,r,c)); + + Index sr = internal::random(1,rows) - 1; + Index nr = internal::random(1,rows-sr); + Index sc = internal::random(1,cols) - 1; + Index nc = internal::random(1,cols-sc); + + COMPARE_CORNER(topRows(r), block(0,0,r,cols)); + COMPARE_CORNER(middleRows(sr,nr), block(sr,0,nr,cols)); + COMPARE_CORNER(bottomRows(r), block(rows-r,0,r,cols)); + COMPARE_CORNER(leftCols(c), block(0,0,rows,c)); + COMPARE_CORNER(middleCols(sc,nc), block(0,sc,rows,nc)); + COMPARE_CORNER(rightCols(c), block(0,cols-c,rows,c)); +} + +template void corners_fixedsize() +{ + MatrixType matrix = MatrixType::Random(); + const MatrixType const_matrix = MatrixType::Random(); + + enum { + rows = MatrixType::RowsAtCompileTime, + cols = MatrixType::ColsAtCompileTime, + r = CRows, + c = CCols, + sr = SRows, + sc = SCols + }; + + VERIFY_IS_EQUAL((matrix.template topLeftCorner()), (matrix.template block(0,0))); + VERIFY_IS_EQUAL((matrix.template topRightCorner()), (matrix.template block(0,cols-c))); + VERIFY_IS_EQUAL((matrix.template bottomLeftCorner()), (matrix.template block(rows-r,0))); + VERIFY_IS_EQUAL((matrix.template bottomRightCorner()), (matrix.template block(rows-r,cols-c))); + + VERIFY_IS_EQUAL((matrix.template topLeftCorner()), (matrix.template topLeftCorner(r,c))); + VERIFY_IS_EQUAL((matrix.template topRightCorner()), (matrix.template topRightCorner(r,c))); + VERIFY_IS_EQUAL((matrix.template bottomLeftCorner()), (matrix.template bottomLeftCorner(r,c))); + VERIFY_IS_EQUAL((matrix.template bottomRightCorner()), (matrix.template bottomRightCorner(r,c))); + + VERIFY_IS_EQUAL((matrix.template topLeftCorner()), (matrix.template topLeftCorner(r,c))); + VERIFY_IS_EQUAL((matrix.template topRightCorner()), (matrix.template topRightCorner(r,c))); + VERIFY_IS_EQUAL((matrix.template bottomLeftCorner()), (matrix.template bottomLeftCorner(r,c))); + VERIFY_IS_EQUAL((matrix.template bottomRightCorner()), (matrix.template bottomRightCorner(r,c))); + + VERIFY_IS_EQUAL((matrix.template topRows()), (matrix.template block(0,0))); + VERIFY_IS_EQUAL((matrix.template middleRows(sr)), (matrix.template block(sr,0))); + VERIFY_IS_EQUAL((matrix.template bottomRows()), (matrix.template block(rows-r,0))); + VERIFY_IS_EQUAL((matrix.template leftCols()), (matrix.template block(0,0))); + VERIFY_IS_EQUAL((matrix.template middleCols(sc)), (matrix.template block(0,sc))); + VERIFY_IS_EQUAL((matrix.template rightCols()), (matrix.template block(0,cols-c))); + + VERIFY_IS_EQUAL((const_matrix.template topLeftCorner()), (const_matrix.template block(0,0))); + VERIFY_IS_EQUAL((const_matrix.template topRightCorner()), (const_matrix.template block(0,cols-c))); + VERIFY_IS_EQUAL((const_matrix.template bottomLeftCorner()), (const_matrix.template block(rows-r,0))); + VERIFY_IS_EQUAL((const_matrix.template bottomRightCorner()), (const_matrix.template block(rows-r,cols-c))); + + VERIFY_IS_EQUAL((const_matrix.template topLeftCorner()), (const_matrix.template topLeftCorner(r,c))); + VERIFY_IS_EQUAL((const_matrix.template topRightCorner()), (const_matrix.template topRightCorner(r,c))); + VERIFY_IS_EQUAL((const_matrix.template bottomLeftCorner()), (const_matrix.template bottomLeftCorner(r,c))); + VERIFY_IS_EQUAL((const_matrix.template bottomRightCorner()), (const_matrix.template bottomRightCorner(r,c))); + + VERIFY_IS_EQUAL((const_matrix.template topLeftCorner()), (const_matrix.template topLeftCorner(r,c))); + VERIFY_IS_EQUAL((const_matrix.template topRightCorner()), (const_matrix.template topRightCorner(r,c))); + VERIFY_IS_EQUAL((const_matrix.template bottomLeftCorner()), (const_matrix.template bottomLeftCorner(r,c))); + VERIFY_IS_EQUAL((const_matrix.template bottomRightCorner()), (const_matrix.template bottomRightCorner(r,c))); + + VERIFY_IS_EQUAL((const_matrix.template topRows()), (const_matrix.template block(0,0))); + VERIFY_IS_EQUAL((const_matrix.template middleRows(sr)), (const_matrix.template block(sr,0))); + VERIFY_IS_EQUAL((const_matrix.template bottomRows()), (const_matrix.template block(rows-r,0))); + VERIFY_IS_EQUAL((const_matrix.template leftCols()), (const_matrix.template block(0,0))); + VERIFY_IS_EQUAL((const_matrix.template middleCols(sc)), (const_matrix.template block(0,sc))); + VERIFY_IS_EQUAL((const_matrix.template rightCols()), (const_matrix.template block(0,cols-c))); +} + +void test_corners() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( corners(Matrix()) ); + CALL_SUBTEST_2( corners(Matrix4d()) ); + CALL_SUBTEST_3( corners(Matrix()) ); + CALL_SUBTEST_4( corners(MatrixXcf(5, 7)) ); + CALL_SUBTEST_5( corners(MatrixXf(21, 20)) ); + + CALL_SUBTEST_1(( corners_fixedsize, 1, 1, 0, 0>() )); + CALL_SUBTEST_2(( corners_fixedsize() )); + CALL_SUBTEST_3(( corners_fixedsize,4,7,5,2>() )); + } +} diff --git a/thirdparty/eigen/test/ctorleak.cpp b/thirdparty/eigen/test/ctorleak.cpp new file mode 100644 index 000000000..c158f5e4e --- /dev/null +++ b/thirdparty/eigen/test/ctorleak.cpp @@ -0,0 +1,69 @@ +#include "main.h" + +#include // std::exception + +struct Foo +{ + static Index object_count; + static Index object_limit; + int dummy; + + Foo() + { +#ifdef EIGEN_EXCEPTIONS + // TODO: Is this the correct way to handle this? + if (Foo::object_count > Foo::object_limit) { std::cout << "\nThrow!\n"; throw Foo::Fail(); } +#endif + std::cout << '+'; + ++Foo::object_count; + } + + ~Foo() + { + std::cout << '-'; + --Foo::object_count; + } + + class Fail : public std::exception {}; +}; + +Index Foo::object_count = 0; +Index Foo::object_limit = 0; + +#undef EIGEN_TEST_MAX_SIZE +#define EIGEN_TEST_MAX_SIZE 3 + +void test_ctorleak() +{ + typedef Matrix MatrixX; + typedef Matrix VectorX; + Foo::object_count = 0; + for(int i = 0; i < g_repeat; i++) { + Index rows = internal::random(2,EIGEN_TEST_MAX_SIZE), cols = internal::random(2,EIGEN_TEST_MAX_SIZE); + Foo::object_limit = internal::random(0, rows*cols - 2); + std::cout << "object_limit =" << Foo::object_limit << std::endl; +#ifdef EIGEN_EXCEPTIONS + try + { +#endif + std::cout << "\nMatrixX m(" << rows << ", " << cols << ");\n"; + MatrixX m(rows, cols); +#ifdef EIGEN_EXCEPTIONS + VERIFY(false); // not reached if exceptions are enabled + } + catch (const Foo::Fail&) { /* ignore */ } +#endif + VERIFY_IS_EQUAL(Index(0), Foo::object_count); + + { + Foo::object_limit = (rows+1)*(cols+1); + MatrixX A(rows, cols); + VERIFY_IS_EQUAL(Foo::object_count, rows*cols); + VectorX v=A.row(0); + VERIFY_IS_EQUAL(Foo::object_count, (rows+1)*cols); + v = A.col(0); + VERIFY_IS_EQUAL(Foo::object_count, rows*(cols+1)); + } + VERIFY_IS_EQUAL(Index(0), Foo::object_count); + } +} diff --git a/thirdparty/eigen/test/cuda_basic.cu b/thirdparty/eigen/test/cuda_basic.cu new file mode 100644 index 000000000..cb2e4167a --- /dev/null +++ b/thirdparty/eigen/test/cuda_basic.cu @@ -0,0 +1,173 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// workaround issue between gcc >= 4.7 and cuda 5.5 +#if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7) + #undef _GLIBCXX_ATOMIC_BUILTINS + #undef _GLIBCXX_USE_INT128 +#endif + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cuda_basic +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int + +#include +#include +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include +#endif +#include "main.h" +#include "cuda_common.h" + +// Check that dense modules can be properly parsed by nvcc +#include + +// struct Foo{ +// EIGEN_DEVICE_FUNC +// void operator()(int i, const float* mats, float* vecs) const { +// using namespace Eigen; +// // Matrix3f M(data); +// // Vector3f x(data+9); +// // Map(data+9) = M.inverse() * x; +// Matrix3f M(mats+i/16); +// Vector3f x(vecs+i*3); +// // using std::min; +// // using std::sqrt; +// Map(vecs+i*3) << x.minCoeff(), 1, 2;// / x.dot(x);//(M.inverse() * x) / x.x(); +// //x = x*2 + x.y() * x + x * x.maxCoeff() - x / x.sum(); +// } +// }; + +template +struct coeff_wise { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + T x1(in+i); + T x2(in+i+1); + T x3(in+i+2); + Map res(out+i*T::MaxSizeAtCompileTime); + + res.array() += (in[0] * x1 + x2).array() * x3.array(); + } +}; + +template +struct replicate { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + T x1(in+i); + int step = x1.size() * 4; + int stride = 3 * step; + + typedef Map > MapType; + MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2); + MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3); + MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3); + } +}; + +template +struct redux { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + int N = 10; + T x1(in+i); + out[i*N+0] = x1.minCoeff(); + out[i*N+1] = x1.maxCoeff(); + out[i*N+2] = x1.sum(); + out[i*N+3] = x1.prod(); + out[i*N+4] = x1.matrix().squaredNorm(); + out[i*N+5] = x1.matrix().norm(); + out[i*N+6] = x1.colwise().sum().maxCoeff(); + out[i*N+7] = x1.rowwise().maxCoeff().sum(); + out[i*N+8] = x1.matrix().colwise().squaredNorm().sum(); + } +}; + +template +struct prod_test { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const + { + using namespace Eigen; + typedef Matrix T3; + T1 x1(in+i); + T2 x2(in+i+1); + Map res(out+i*T3::MaxSizeAtCompileTime); + res += in[i] * x1 * x2; + } +}; + +template +struct diagonal { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const + { + using namespace Eigen; + T1 x1(in+i); + Map res(out+i*T2::MaxSizeAtCompileTime); + res += x1.diagonal(); + } +}; + +template +struct eigenvalues { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + typedef Matrix Vec; + T M(in+i); + Map res(out+i*Vec::MaxSizeAtCompileTime); + T A = M*M.adjoint(); + SelfAdjointEigenSolver eig; + eig.computeDirect(M); + res = eig.eigenvalues(); + } +}; + +void test_cuda_basic() +{ + ei_test_init_cuda(); + + int nthreads = 100; + Eigen::VectorXf in, out; + + #ifndef __CUDA_ARCH__ + int data_size = nthreads * 512; + in.setRandom(data_size); + out.setRandom(data_size); + #endif + + CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(replicate(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(replicate(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(redux(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(redux(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(prod_test(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(prod_test(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(diagonal(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(diagonal(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues(), nthreads, in, out) ); + +} diff --git a/thirdparty/eigen/test/cuda_common.h b/thirdparty/eigen/test/cuda_common.h new file mode 100644 index 000000000..9737693ac --- /dev/null +++ b/thirdparty/eigen/test/cuda_common.h @@ -0,0 +1,101 @@ + +#ifndef EIGEN_TEST_CUDA_COMMON_H +#define EIGEN_TEST_CUDA_COMMON_H + +#include +#include +#include +#include + +#ifndef __CUDACC__ +dim3 threadIdx, blockDim, blockIdx; +#endif + +template +void run_on_cpu(const Kernel& ker, int n, const Input& in, Output& out) +{ + for(int i=0; i +__global__ +void run_on_cuda_meta_kernel(const Kernel ker, int n, const Input* in, Output* out) +{ + int i = threadIdx.x + blockIdx.x*blockDim.x; + if(i +void run_on_cuda(const Kernel& ker, int n, const Input& in, Output& out) +{ + typename Input::Scalar* d_in; + typename Output::Scalar* d_out; + std::ptrdiff_t in_bytes = in.size() * sizeof(typename Input::Scalar); + std::ptrdiff_t out_bytes = out.size() * sizeof(typename Output::Scalar); + + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_out, out.data(), out_bytes, cudaMemcpyHostToDevice); + + // Simple and non-optimal 1D mapping assuming n is not too large + // That's only for unit testing! + dim3 Blocks(128); + dim3 Grids( (n+int(Blocks.x)-1)/int(Blocks.x) ); + + cudaThreadSynchronize(); + run_on_cuda_meta_kernel<<>>(ker, n, d_in, d_out); + cudaThreadSynchronize(); + + // check inputs have not been modified + cudaMemcpy(const_cast(in.data()), d_in, in_bytes, cudaMemcpyDeviceToHost); + cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost); + + cudaFree(d_in); + cudaFree(d_out); +} + + +template +void run_and_compare_to_cuda(const Kernel& ker, int n, const Input& in, Output& out) +{ + Input in_ref, in_cuda; + Output out_ref, out_cuda; + #ifndef __CUDA_ARCH__ + in_ref = in_cuda = in; + out_ref = out_cuda = out; + #endif + run_on_cpu (ker, n, in_ref, out_ref); + run_on_cuda(ker, n, in_cuda, out_cuda); + #ifndef __CUDA_ARCH__ + VERIFY_IS_APPROX(in_ref, in_cuda); + VERIFY_IS_APPROX(out_ref, out_cuda); + #endif +} + + +void ei_test_init_cuda() +{ + int device = 0; + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, device); + std::cout << "CUDA device info:\n"; + std::cout << " name: " << deviceProp.name << "\n"; + std::cout << " capability: " << deviceProp.major << "." << deviceProp.minor << "\n"; + std::cout << " multiProcessorCount: " << deviceProp.multiProcessorCount << "\n"; + std::cout << " maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor << "\n"; + std::cout << " warpSize: " << deviceProp.warpSize << "\n"; + std::cout << " regsPerBlock: " << deviceProp.regsPerBlock << "\n"; + std::cout << " concurrentKernels: " << deviceProp.concurrentKernels << "\n"; + std::cout << " clockRate: " << deviceProp.clockRate << "\n"; + std::cout << " canMapHostMemory: " << deviceProp.canMapHostMemory << "\n"; + std::cout << " computeMode: " << deviceProp.computeMode << "\n"; +} + +#endif // EIGEN_TEST_CUDA_COMMON_H diff --git a/thirdparty/eigen/test/denseLM.cpp b/thirdparty/eigen/test/denseLM.cpp new file mode 100644 index 000000000..0aa736ea3 --- /dev/null +++ b/thirdparty/eigen/test/denseLM.cpp @@ -0,0 +1,190 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include + +#include "main.h" +#include +using namespace std; +using namespace Eigen; + +template +struct DenseLM : DenseFunctor +{ + typedef DenseFunctor Base; + typedef typename Base::JacobianType JacobianType; + typedef Matrix VectorType; + + DenseLM(int n, int m) : DenseFunctor(n,m) + { } + + VectorType model(const VectorType& uv, VectorType& x) + { + VectorType y; // Should change to use expression template + int m = Base::values(); + int n = Base::inputs(); + eigen_assert(uv.size()%2 == 0); + eigen_assert(uv.size() == n); + eigen_assert(x.size() == m); + y.setZero(m); + int half = n/2; + VectorBlock u(uv, 0, half); + VectorBlock v(uv, half, half); + for (int j = 0; j < m; j++) + { + for (int i = 0; i < half; i++) + y(j) += u(i)*std::exp(-(x(j)-i)*(x(j)-i)/(v(i)*v(i))); + } + return y; + + } + void initPoints(VectorType& uv_ref, VectorType& x) + { + m_x = x; + m_y = this->model(uv_ref, x); + } + + int operator()(const VectorType& uv, VectorType& fvec) + { + + int m = Base::values(); + int n = Base::inputs(); + eigen_assert(uv.size()%2 == 0); + eigen_assert(uv.size() == n); + eigen_assert(fvec.size() == m); + int half = n/2; + VectorBlock u(uv, 0, half); + VectorBlock v(uv, half, half); + for (int j = 0; j < m; j++) + { + fvec(j) = m_y(j); + for (int i = 0; i < half; i++) + { + fvec(j) -= u(i) *std::exp(-(m_x(j)-i)*(m_x(j)-i)/(v(i)*v(i))); + } + } + + return 0; + } + int df(const VectorType& uv, JacobianType& fjac) + { + int m = Base::values(); + int n = Base::inputs(); + eigen_assert(n == uv.size()); + eigen_assert(fjac.rows() == m); + eigen_assert(fjac.cols() == n); + int half = n/2; + VectorBlock u(uv, 0, half); + VectorBlock v(uv, half, half); + for (int j = 0; j < m; j++) + { + for (int i = 0; i < half; i++) + { + fjac.coeffRef(j,i) = -std::exp(-(m_x(j)-i)*(m_x(j)-i)/(v(i)*v(i))); + fjac.coeffRef(j,i+half) = -2.*u(i)*(m_x(j)-i)*(m_x(j)-i)/(std::pow(v(i),3)) * std::exp(-(m_x(j)-i)*(m_x(j)-i)/(v(i)*v(i))); + } + } + return 0; + } + VectorType m_x, m_y; //Data Points +}; + +template +int test_minimizeLM(FunctorType& functor, VectorType& uv) +{ + LevenbergMarquardt lm(functor); + LevenbergMarquardtSpace::Status info; + + info = lm.minimize(uv); + + VERIFY_IS_EQUAL(info, 1); + //FIXME Check other parameters + return info; +} + +template +int test_lmder(FunctorType& functor, VectorType& uv) +{ + typedef typename VectorType::Scalar Scalar; + LevenbergMarquardtSpace::Status info; + LevenbergMarquardt lm(functor); + info = lm.lmder1(uv); + + VERIFY_IS_EQUAL(info, 1); + //FIXME Check other parameters + return info; +} + +template +int test_minimizeSteps(FunctorType& functor, VectorType& uv) +{ + LevenbergMarquardtSpace::Status info; + LevenbergMarquardt lm(functor); + info = lm.minimizeInit(uv); + if (info==LevenbergMarquardtSpace::ImproperInputParameters) + return info; + do + { + info = lm.minimizeOneStep(uv); + } while (info==LevenbergMarquardtSpace::Running); + + VERIFY_IS_EQUAL(info, 1); + //FIXME Check other parameters + return info; +} + +template +void test_denseLM_T() +{ + typedef Matrix VectorType; + + int inputs = 10; + int values = 1000; + DenseLM dense_gaussian(inputs, values); + VectorType uv(inputs),uv_ref(inputs); + VectorType x(values); + + // Generate the reference solution + uv_ref << -2, 1, 4 ,8, 6, 1.8, 1.2, 1.1, 1.9 , 3; + + //Generate the reference data points + x.setRandom(); + x = 10*x; + x.array() += 10; + dense_gaussian.initPoints(uv_ref, x); + + // Generate the initial parameters + VectorBlock u(uv, 0, inputs/2); + VectorBlock v(uv, inputs/2, inputs/2); + + // Solve the optimization problem + + //Solve in one go + u.setOnes(); v.setOnes(); + test_minimizeLM(dense_gaussian, uv); + + //Solve until the machine precision + u.setOnes(); v.setOnes(); + test_lmder(dense_gaussian, uv); + + // Solve step by step + v.setOnes(); u.setOnes(); + test_minimizeSteps(dense_gaussian, uv); + +} + +void test_denseLM() +{ + CALL_SUBTEST_2(test_denseLM_T()); + + // CALL_SUBTEST_2(test_sparseLM_T()); +} diff --git a/thirdparty/eigen/test/dense_storage.cpp b/thirdparty/eigen/test/dense_storage.cpp new file mode 100644 index 000000000..e63712b1a --- /dev/null +++ b/thirdparty/eigen/test/dense_storage.cpp @@ -0,0 +1,76 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +template +void dense_storage_copy() +{ + static const int Size = ((Rows==Dynamic || Cols==Dynamic) ? Dynamic : Rows*Cols); + typedef DenseStorage DenseStorageType; + + const int rows = (Rows==Dynamic) ? 4 : Rows; + const int cols = (Cols==Dynamic) ? 3 : Cols; + const int size = rows*cols; + DenseStorageType reference(size, rows, cols); + T* raw_reference = reference.data(); + for (int i=0; i(i); + + DenseStorageType copied_reference(reference); + const T* raw_copied_reference = copied_reference.data(); + for (int i=0; i +void dense_storage_assignment() +{ + static const int Size = ((Rows==Dynamic || Cols==Dynamic) ? Dynamic : Rows*Cols); + typedef DenseStorage DenseStorageType; + + const int rows = (Rows==Dynamic) ? 4 : Rows; + const int cols = (Cols==Dynamic) ? 3 : Cols; + const int size = rows*cols; + DenseStorageType reference(size, rows, cols); + T* raw_reference = reference.data(); + for (int i=0; i(i); + + DenseStorageType copied_reference; + copied_reference = reference; + const T* raw_copied_reference = copied_reference.data(); + for (int i=0; i(); + dense_storage_copy(); + dense_storage_copy(); + dense_storage_copy(); + + dense_storage_copy(); + dense_storage_copy(); + dense_storage_copy(); + dense_storage_copy(); + + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); + + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); +} diff --git a/thirdparty/eigen/test/determinant.cpp b/thirdparty/eigen/test/determinant.cpp new file mode 100644 index 000000000..758f3afbb --- /dev/null +++ b/thirdparty/eigen/test/determinant.cpp @@ -0,0 +1,67 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void determinant(const MatrixType& m) +{ + /* this test covers the following files: + Determinant.h + */ + typedef typename MatrixType::Index Index; + Index size = m.rows(); + + MatrixType m1(size, size), m2(size, size); + m1.setRandom(); + m2.setRandom(); + typedef typename MatrixType::Scalar Scalar; + Scalar x = internal::random(); + VERIFY_IS_APPROX(MatrixType::Identity(size, size).determinant(), Scalar(1)); + VERIFY_IS_APPROX((m1*m2).eval().determinant(), m1.determinant() * m2.determinant()); + if(size==1) return; + Index i = internal::random(0, size-1); + Index j; + do { + j = internal::random(0, size-1); + } while(j==i); + m2 = m1; + m2.row(i).swap(m2.row(j)); + VERIFY_IS_APPROX(m2.determinant(), -m1.determinant()); + m2 = m1; + m2.col(i).swap(m2.col(j)); + VERIFY_IS_APPROX(m2.determinant(), -m1.determinant()); + VERIFY_IS_APPROX(m2.determinant(), m2.transpose().determinant()); + VERIFY_IS_APPROX(numext::conj(m2.determinant()), m2.adjoint().determinant()); + m2 = m1; + m2.row(i) += x*m2.row(j); + VERIFY_IS_APPROX(m2.determinant(), m1.determinant()); + m2 = m1; + m2.row(i) *= x; + VERIFY_IS_APPROX(m2.determinant(), m1.determinant() * x); + + // check empty matrix + VERIFY_IS_APPROX(m2.block(0,0,0,0).determinant(), Scalar(1)); +} + +void test_determinant() +{ + for(int i = 0; i < g_repeat; i++) { + int s = 0; + CALL_SUBTEST_1( determinant(Matrix()) ); + CALL_SUBTEST_2( determinant(Matrix()) ); + CALL_SUBTEST_3( determinant(Matrix()) ); + CALL_SUBTEST_4( determinant(Matrix()) ); + CALL_SUBTEST_5( determinant(Matrix, 10, 10>()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_6( determinant(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } +} diff --git a/thirdparty/eigen/test/diagonal.cpp b/thirdparty/eigen/test/diagonal.cpp new file mode 100644 index 000000000..ee00cad55 --- /dev/null +++ b/thirdparty/eigen/test/diagonal.cpp @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void diagonal(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols); + + Scalar s1 = internal::random(); + + //check diagonal() + VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal()); + m2.diagonal() = 2 * m1.diagonal(); + m2.diagonal()[0] *= 3; + + if (rows>2) + { + enum { + N1 = MatrixType::RowsAtCompileTime>2 ? 2 : 0, + N2 = MatrixType::RowsAtCompileTime>1 ? -1 : 0 + }; + + // check sub/super diagonal + if(MatrixType::SizeAtCompileTime!=Dynamic) + { + VERIFY(m1.template diagonal().RowsAtCompileTime == m1.diagonal(N1).size()); + VERIFY(m1.template diagonal().RowsAtCompileTime == m1.diagonal(N2).size()); + } + + m2.template diagonal() = 2 * m1.template diagonal(); + VERIFY_IS_APPROX(m2.template diagonal(), static_cast(2) * m1.diagonal(N1)); + m2.template diagonal()[0] *= 3; + VERIFY_IS_APPROX(m2.template diagonal()[0], static_cast(6) * m1.template diagonal()[0]); + + + m2.template diagonal() = 2 * m1.template diagonal(); + m2.template diagonal()[0] *= 3; + VERIFY_IS_APPROX(m2.template diagonal()[0], static_cast(6) * m1.template diagonal()[0]); + + m2.diagonal(N1) = 2 * m1.diagonal(N1); + VERIFY_IS_APPROX(m2.template diagonal(), static_cast(2) * m1.diagonal(N1)); + m2.diagonal(N1)[0] *= 3; + VERIFY_IS_APPROX(m2.diagonal(N1)[0], static_cast(6) * m1.diagonal(N1)[0]); + + m2.diagonal(N2) = 2 * m1.diagonal(N2); + VERIFY_IS_APPROX(m2.template diagonal(), static_cast(2) * m1.diagonal(N2)); + m2.diagonal(N2)[0] *= 3; + VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast(6) * m1.diagonal(N2)[0]); + + m2.diagonal(N2).x() = s1; + VERIFY_IS_APPROX(m2.diagonal(N2).x(), s1); + m2.diagonal(N2).coeffRef(0) = Scalar(2)*s1; + VERIFY_IS_APPROX(m2.diagonal(N2).coeff(0), Scalar(2)*s1); + } +} + +void test_diagonal() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( diagonal(Matrix()) ); + CALL_SUBTEST_1( diagonal(Matrix()) ); + CALL_SUBTEST_1( diagonal(Matrix()) ); + CALL_SUBTEST_2( diagonal(Matrix4d()) ); + CALL_SUBTEST_2( diagonal(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( diagonal(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( diagonal(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_1( diagonal(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_1( diagonal(Matrix(3, 4)) ); + } +} diff --git a/thirdparty/eigen/test/diagonalmatrices.cpp b/thirdparty/eigen/test/diagonalmatrices.cpp new file mode 100644 index 000000000..cd6dc8cf0 --- /dev/null +++ b/thirdparty/eigen/test/diagonalmatrices.cpp @@ -0,0 +1,129 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +using namespace std; +template void diagonalmatrices(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; + typedef Matrix VectorType; + typedef Matrix RowVectorType; + typedef Matrix SquareMatrixType; + typedef Matrix DynMatrixType; + typedef DiagonalMatrix LeftDiagonalMatrix; + typedef DiagonalMatrix RightDiagonalMatrix; + typedef Matrix BigMatrix; + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols); + VectorType v1 = VectorType::Random(rows), + v2 = VectorType::Random(rows); + RowVectorType rv1 = RowVectorType::Random(cols), + rv2 = RowVectorType::Random(cols); + LeftDiagonalMatrix ldm1(v1), ldm2(v2); + RightDiagonalMatrix rdm1(rv1), rdm2(rv2); + + Scalar s1 = internal::random(); + + SquareMatrixType sq_m1 (v1.asDiagonal()); + VERIFY_IS_APPROX(sq_m1, v1.asDiagonal().toDenseMatrix()); + sq_m1 = v1.asDiagonal(); + VERIFY_IS_APPROX(sq_m1, v1.asDiagonal().toDenseMatrix()); + SquareMatrixType sq_m2 = v1.asDiagonal(); + VERIFY_IS_APPROX(sq_m1, sq_m2); + + ldm1 = v1.asDiagonal(); + LeftDiagonalMatrix ldm3(v1); + VERIFY_IS_APPROX(ldm1.diagonal(), ldm3.diagonal()); + LeftDiagonalMatrix ldm4 = v1.asDiagonal(); + VERIFY_IS_APPROX(ldm1.diagonal(), ldm4.diagonal()); + + sq_m1.block(0,0,rows,rows) = ldm1; + VERIFY_IS_APPROX(sq_m1, ldm1.toDenseMatrix()); + sq_m1.transpose() = ldm1; + VERIFY_IS_APPROX(sq_m1, ldm1.toDenseMatrix()); + + Index i = internal::random(0, rows-1); + Index j = internal::random(0, cols-1); + + VERIFY_IS_APPROX( ((ldm1 * m1)(i,j)) , ldm1.diagonal()(i) * m1(i,j) ); + VERIFY_IS_APPROX( ((ldm1 * (m1+m2))(i,j)) , ldm1.diagonal()(i) * (m1+m2)(i,j) ); + VERIFY_IS_APPROX( ((m1 * rdm1)(i,j)) , rdm1.diagonal()(j) * m1(i,j) ); + VERIFY_IS_APPROX( ((v1.asDiagonal() * m1)(i,j)) , v1(i) * m1(i,j) ); + VERIFY_IS_APPROX( ((m1 * rv1.asDiagonal())(i,j)) , rv1(j) * m1(i,j) ); + VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * m1)(i,j)) , (v1+v2)(i) * m1(i,j) ); + VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * (m1+m2))(i,j)) , (v1+v2)(i) * (m1+m2)(i,j) ); + VERIFY_IS_APPROX( ((m1 * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * m1(i,j) ); + VERIFY_IS_APPROX( (((m1+m2) * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * (m1+m2)(i,j) ); + + if(rows>1) + { + DynMatrixType tmp = m1.topRows(rows/2), res; + VERIFY_IS_APPROX( (res = m1.topRows(rows/2) * rv1.asDiagonal()), tmp * rv1.asDiagonal() ); + VERIFY_IS_APPROX( (res = v1.head(rows/2).asDiagonal()*m1.topRows(rows/2)), v1.head(rows/2).asDiagonal()*tmp ); + } + + BigMatrix big; + big.setZero(2*rows, 2*cols); + + big.block(i,j,rows,cols) = m1; + big.block(i,j,rows,cols) = v1.asDiagonal() * big.block(i,j,rows,cols); + + VERIFY_IS_APPROX((big.block(i,j,rows,cols)) , v1.asDiagonal() * m1 ); + + big.block(i,j,rows,cols) = m1; + big.block(i,j,rows,cols) = big.block(i,j,rows,cols) * rv1.asDiagonal(); + VERIFY_IS_APPROX((big.block(i,j,rows,cols)) , m1 * rv1.asDiagonal() ); + + + // scalar multiple + VERIFY_IS_APPROX(LeftDiagonalMatrix(ldm1*s1).diagonal(), ldm1.diagonal() * s1); + VERIFY_IS_APPROX(LeftDiagonalMatrix(s1*ldm1).diagonal(), s1 * ldm1.diagonal()); + + VERIFY_IS_APPROX(m1 * (rdm1 * s1), (m1 * rdm1) * s1); + VERIFY_IS_APPROX(m1 * (s1 * rdm1), (m1 * rdm1) * s1); + + // Diagonal to dense + sq_m1.setRandom(); + sq_m2 = sq_m1; + VERIFY_IS_APPROX( (sq_m1 += (s1*v1).asDiagonal()), sq_m2 += (s1*v1).asDiagonal().toDenseMatrix() ); + VERIFY_IS_APPROX( (sq_m1 -= (s1*v1).asDiagonal()), sq_m2 -= (s1*v1).asDiagonal().toDenseMatrix() ); + VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() ); +} + +template +void bug987() +{ + Matrix3Xd points = Matrix3Xd::Random(3, 3); + Vector2d diag = Vector2d::Random(); + Matrix2Xd tmp1 = points.topRows<2>(), res1, res2; + VERIFY_IS_APPROX( res1 = diag.asDiagonal() * points.topRows<2>(), res2 = diag.asDiagonal() * tmp1 ); + Matrix2d tmp2 = points.topLeftCorner<2,2>(); + VERIFY_IS_APPROX(( res1 = points.topLeftCorner<2,2>()*diag.asDiagonal()) , res2 = tmp2*diag.asDiagonal() ); +} + +void test_diagonalmatrices() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( diagonalmatrices(Matrix()) ); + CALL_SUBTEST_2( diagonalmatrices(Matrix3f()) ); + CALL_SUBTEST_3( diagonalmatrices(Matrix()) ); + CALL_SUBTEST_4( diagonalmatrices(Matrix4d()) ); + CALL_SUBTEST_5( diagonalmatrices(Matrix()) ); + CALL_SUBTEST_6( diagonalmatrices(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( diagonalmatrices(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( diagonalmatrices(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_9( diagonalmatrices(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + CALL_SUBTEST_10( bug987<0>() ); +} diff --git a/thirdparty/eigen/test/dontalign.cpp b/thirdparty/eigen/test/dontalign.cpp new file mode 100644 index 000000000..4643cfed6 --- /dev/null +++ b/thirdparty/eigen/test/dontalign.cpp @@ -0,0 +1,63 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined EIGEN_TEST_PART_1 || defined EIGEN_TEST_PART_2 || defined EIGEN_TEST_PART_3 || defined EIGEN_TEST_PART_4 +#define EIGEN_DONT_ALIGN +#elif defined EIGEN_TEST_PART_5 || defined EIGEN_TEST_PART_6 || defined EIGEN_TEST_PART_7 || defined EIGEN_TEST_PART_8 +#define EIGEN_DONT_ALIGN_STATICALLY +#endif + +#include "main.h" +#include + +template +void dontalign(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + typedef Matrix SquareMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType a = MatrixType::Random(rows,cols); + SquareMatrixType square = SquareMatrixType::Random(rows,rows); + VectorType v = VectorType::Random(rows); + + VERIFY_IS_APPROX(v, square * square.colPivHouseholderQr().solve(v)); + square = square.inverse().eval(); + a = square * a; + square = square*square; + v = square * v; + v = a.adjoint() * v; + VERIFY(square.determinant() != Scalar(0)); + + // bug 219: MapAligned() was giving an assert with EIGEN_DONT_ALIGN, because Map Flags were miscomputed + Scalar* array = internal::aligned_new(rows); + v = VectorType::MapAligned(array, rows); + internal::aligned_delete(array, rows); +} + +void test_dontalign() +{ +#if defined EIGEN_TEST_PART_1 || defined EIGEN_TEST_PART_5 + dontalign(Matrix3d()); + dontalign(Matrix4f()); +#elif defined EIGEN_TEST_PART_2 || defined EIGEN_TEST_PART_6 + dontalign(Matrix3cd()); + dontalign(Matrix4cf()); +#elif defined EIGEN_TEST_PART_3 || defined EIGEN_TEST_PART_7 + dontalign(Matrix()); + dontalign(Matrix, 32, 32>()); +#elif defined EIGEN_TEST_PART_4 || defined EIGEN_TEST_PART_8 + dontalign(MatrixXd(32, 32)); + dontalign(MatrixXcf(32, 32)); +#endif +} diff --git a/thirdparty/eigen/test/dynalloc.cpp b/thirdparty/eigen/test/dynalloc.cpp new file mode 100644 index 000000000..f1cc70bee --- /dev/null +++ b/thirdparty/eigen/test/dynalloc.cpp @@ -0,0 +1,175 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#if EIGEN_MAX_ALIGN_BYTES>0 +#define ALIGNMENT EIGEN_MAX_ALIGN_BYTES +#else +#define ALIGNMENT 1 +#endif + +typedef Matrix Vector8f; + +void check_handmade_aligned_malloc() +{ + for(int i = 1; i < 1000; i++) + { + char *p = (char*)internal::handmade_aligned_malloc(i); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + internal::handmade_aligned_free(p); + } +} + +void check_aligned_malloc() +{ + for(int i = ALIGNMENT; i < 1000; i++) + { + char *p = (char*)internal::aligned_malloc(i); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + internal::aligned_free(p); + } +} + +void check_aligned_new() +{ + for(int i = ALIGNMENT; i < 1000; i++) + { + float *p = internal::aligned_new(i); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + internal::aligned_delete(p,i); + } +} + +void check_aligned_stack_alloc() +{ + for(int i = ALIGNMENT; i < 400; i++) + { + ei_declare_aligned_stack_constructed_variable(float,p,i,0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); + // if the buffer is wrongly allocated this will give a bad write --> check with valgrind + for(int j = 0; j < i; j++) p[j]=0; + } +} + + +// test compilation with both a struct and a class... +struct MyStruct +{ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + char dummychar; + Vector8f avec; +}; + +class MyClassA +{ + public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + char dummychar; + Vector8f avec; +}; + +template void check_dynaligned() +{ + // TODO have to be updated once we support multiple alignment values + if(T::SizeAtCompileTime % ALIGNMENT == 0) + { + T* obj = new T; + VERIFY(T::NeedsToAlign==1); + VERIFY(internal::UIntPtr(obj)%ALIGNMENT==0); + delete obj; + } +} + +template void check_custom_new_delete() +{ + { + T* t = new T; + delete t; + } + + { + std::size_t N = internal::random(1,10); + T* t = new T[N]; + delete[] t; + } + +#if EIGEN_MAX_ALIGN_BYTES>0 + { + T* t = static_cast((T::operator new)(sizeof(T))); + (T::operator delete)(t, sizeof(T)); + } + + { + T* t = static_cast((T::operator new)(sizeof(T))); + (T::operator delete)(t); + } +#endif +} + +void test_dynalloc() +{ + // low level dynamic memory allocation + CALL_SUBTEST(check_handmade_aligned_malloc()); + CALL_SUBTEST(check_aligned_malloc()); + CALL_SUBTEST(check_aligned_new()); + CALL_SUBTEST(check_aligned_stack_alloc()); + + for (int i=0; i() ); + CALL_SUBTEST( check_custom_new_delete() ); + CALL_SUBTEST( check_custom_new_delete() ); + CALL_SUBTEST( check_custom_new_delete() ); + } + + // check static allocation, who knows ? + #if EIGEN_MAX_STATIC_ALIGN_BYTES + for (int i=0; i() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); + } + + { + MyStruct foo0; VERIFY(internal::UIntPtr(foo0.avec.data())%ALIGNMENT==0); + MyClassA fooA; VERIFY(internal::UIntPtr(fooA.avec.data())%ALIGNMENT==0); + } + + // dynamic allocation, single object + for (int i=0; iavec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA(); VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0); + delete foo0; + delete fooA; + } + + // dynamic allocation, array + const int N = 10; + for (int i=0; iavec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA[N]; VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0); + delete[] foo0; + delete[] fooA; + } + #endif + +} diff --git a/thirdparty/eigen/test/eigen2support.cpp b/thirdparty/eigen/test/eigen2support.cpp new file mode 100644 index 000000000..ad1d98091 --- /dev/null +++ b/thirdparty/eigen/test/eigen2support.cpp @@ -0,0 +1,66 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN2_SUPPORT + +#include "main.h" + +template void eigen2support(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m3(rows, cols); + + Scalar s1 = internal::random(), + s2 = internal::random(); + + // scalar addition + VERIFY_IS_APPROX(m1.cwise() + s1, s1 + m1.cwise()); + VERIFY_IS_APPROX(m1.cwise() + s1, MatrixType::Constant(rows,cols,s1) + m1); + VERIFY_IS_APPROX((m1*Scalar(2)).cwise() - s2, (m1+m1) - MatrixType::Constant(rows,cols,s2) ); + m3 = m1; + m3.cwise() += s2; + VERIFY_IS_APPROX(m3, m1.cwise() + s2); + m3 = m1; + m3.cwise() -= s1; + VERIFY_IS_APPROX(m3, m1.cwise() - s1); + + VERIFY_IS_EQUAL((m1.corner(TopLeft,1,1)), (m1.block(0,0,1,1))); + VERIFY_IS_EQUAL((m1.template corner<1,1>(TopLeft)), (m1.template block<1,1>(0,0))); + VERIFY_IS_EQUAL((m1.col(0).start(1)), (m1.col(0).segment(0,1))); + VERIFY_IS_EQUAL((m1.col(0).template start<1>()), (m1.col(0).segment(0,1))); + VERIFY_IS_EQUAL((m1.col(0).end(1)), (m1.col(0).segment(rows-1,1))); + VERIFY_IS_EQUAL((m1.col(0).template end<1>()), (m1.col(0).segment(rows-1,1))); + + using std::cos; + using numext::real; + using numext::abs2; + VERIFY_IS_EQUAL(ei_cos(s1), cos(s1)); + VERIFY_IS_EQUAL(ei_real(s1), real(s1)); + VERIFY_IS_EQUAL(ei_abs2(s1), abs2(s1)); + + m1.minor(0,0); +} + +void test_eigen2support() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( eigen2support(Matrix()) ); + CALL_SUBTEST_2( eigen2support(MatrixXd(1,1)) ); + CALL_SUBTEST_4( eigen2support(Matrix3f()) ); + CALL_SUBTEST_5( eigen2support(Matrix4d()) ); + CALL_SUBTEST_2( eigen2support(MatrixXf(200,200)) ); + CALL_SUBTEST_6( eigen2support(MatrixXcd(100,100)) ); + } +} diff --git a/thirdparty/eigen/test/eigensolver_complex.cpp b/thirdparty/eigen/test/eigensolver_complex.cpp new file mode 100644 index 000000000..8e2bb9ef0 --- /dev/null +++ b/thirdparty/eigen/test/eigensolver_complex.cpp @@ -0,0 +1,168 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2010 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +template bool find_pivot(typename MatrixType::Scalar tol, MatrixType &diffs, Index col=0) +{ + bool match = diffs.diagonal().sum() <= tol; + if(match || col==diffs.cols()) + { + return match; + } + else + { + Index n = diffs.cols(); + std::vector > transpositions; + for(Index i=col; i tol) + break; + + best_index += col; + + diffs.row(col).swap(diffs.row(best_index)); + if(find_pivot(tol,diffs,col+1)) return true; + diffs.row(col).swap(diffs.row(best_index)); + + // move current pivot to the end + diffs.row(n-(i-col)-1).swap(diffs.row(best_index)); + transpositions.push_back(std::pair(n-(i-col)-1,best_index)); + } + // restore + for(Index k=transpositions.size()-1; k>=0; --k) + diffs.row(transpositions[k].first).swap(diffs.row(transpositions[k].second)); + } + return false; +} + +/* Check that two column vectors are approximately equal upto permutations. + * Initially, this method checked that the k-th power sums are equal for all k = 1, ..., vec1.rows(), + * however this strategy is numerically inacurate because of numerical cancellation issues. + */ +template +void verify_is_approx_upto_permutation(const VectorType& vec1, const VectorType& vec2) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + VERIFY(vec1.cols() == 1); + VERIFY(vec2.cols() == 1); + VERIFY(vec1.rows() == vec2.rows()); + + Index n = vec1.rows(); + RealScalar tol = test_precision()*test_precision()*numext::maxi(vec1.squaredNorm(),vec2.squaredNorm()); + Matrix diffs = (vec1.rowwise().replicate(n) - vec2.rowwise().replicate(n).transpose()).cwiseAbs2(); + + VERIFY( find_pivot(tol, diffs) ); +} + + +template void eigensolver(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + /* this test covers the following files: + ComplexEigenSolver.h, and indirectly ComplexSchur.h + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + MatrixType a = MatrixType::Random(rows,cols); + MatrixType symmA = a.adjoint() * a; + + ComplexEigenSolver ei0(symmA); + VERIFY_IS_EQUAL(ei0.info(), Success); + VERIFY_IS_APPROX(symmA * ei0.eigenvectors(), ei0.eigenvectors() * ei0.eigenvalues().asDiagonal()); + + ComplexEigenSolver ei1(a); + VERIFY_IS_EQUAL(ei1.info(), Success); + VERIFY_IS_APPROX(a * ei1.eigenvectors(), ei1.eigenvectors() * ei1.eigenvalues().asDiagonal()); + // Note: If MatrixType is real then a.eigenvalues() uses EigenSolver and thus + // another algorithm so results may differ slightly + verify_is_approx_upto_permutation(a.eigenvalues(), ei1.eigenvalues()); + + ComplexEigenSolver ei2; + ei2.setMaxIterations(ComplexSchur::m_maxIterationsPerRow * rows).compute(a); + VERIFY_IS_EQUAL(ei2.info(), Success); + VERIFY_IS_EQUAL(ei2.eigenvectors(), ei1.eigenvectors()); + VERIFY_IS_EQUAL(ei2.eigenvalues(), ei1.eigenvalues()); + if (rows > 2) { + ei2.setMaxIterations(1).compute(a); + VERIFY_IS_EQUAL(ei2.info(), NoConvergence); + VERIFY_IS_EQUAL(ei2.getMaxIterations(), 1); + } + + ComplexEigenSolver eiNoEivecs(a, false); + VERIFY_IS_EQUAL(eiNoEivecs.info(), Success); + VERIFY_IS_APPROX(ei1.eigenvalues(), eiNoEivecs.eigenvalues()); + + // Regression test for issue #66 + MatrixType z = MatrixType::Zero(rows,cols); + ComplexEigenSolver eiz(z); + VERIFY((eiz.eigenvalues().cwiseEqual(0)).all()); + + MatrixType id = MatrixType::Identity(rows, cols); + VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); + + if (rows > 1 && rows < 20) + { + // Test matrix with NaN + a(0,0) = std::numeric_limits::quiet_NaN(); + ComplexEigenSolver eiNaN(a); + VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); + } + + // regression test for bug 1098 + { + ComplexEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } +} + +template void eigensolver_verify_assert(const MatrixType& m) +{ + ComplexEigenSolver eig; + VERIFY_RAISES_ASSERT(eig.eigenvectors()); + VERIFY_RAISES_ASSERT(eig.eigenvalues()); + + MatrixType a = MatrixType::Random(m.rows(),m.cols()); + eig.compute(a, false); + VERIFY_RAISES_ASSERT(eig.eigenvectors()); +} + +void test_eigensolver_complex() +{ + int s = 0; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( eigensolver(Matrix4cf()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_2( eigensolver(MatrixXcd(s,s)) ); + CALL_SUBTEST_3( eigensolver(Matrix, 1, 1>()) ); + CALL_SUBTEST_4( eigensolver(Matrix3f()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } + CALL_SUBTEST_1( eigensolver_verify_assert(Matrix4cf()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_2( eigensolver_verify_assert(MatrixXcd(s,s)) ); + CALL_SUBTEST_3( eigensolver_verify_assert(Matrix, 1, 1>()) ); + CALL_SUBTEST_4( eigensolver_verify_assert(Matrix3f()) ); + + // Test problem size constructors + CALL_SUBTEST_5(ComplexEigenSolver tmp(s)); + + TEST_SET_BUT_UNUSED_VARIABLE(s) +} diff --git a/thirdparty/eigen/test/eigensolver_generalized_real.cpp b/thirdparty/eigen/test/eigensolver_generalized_real.cpp new file mode 100644 index 000000000..9c0838ba4 --- /dev/null +++ b/thirdparty/eigen/test/eigensolver_generalized_real.cpp @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_RUNTIME_NO_MALLOC +#include "main.h" +#include +#include +#include + +template void generalized_eigensolver_real(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + /* this test covers the following files: + GeneralizedEigenSolver.h + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef std::complex ComplexScalar; + typedef Matrix VectorType; + + MatrixType a = MatrixType::Random(rows,cols); + MatrixType b = MatrixType::Random(rows,cols); + MatrixType a1 = MatrixType::Random(rows,cols); + MatrixType b1 = MatrixType::Random(rows,cols); + MatrixType spdA = a.adjoint() * a + a1.adjoint() * a1; + MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1; + + // lets compare to GeneralizedSelfAdjointEigenSolver + { + GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); + GeneralizedEigenSolver eig(spdA, spdB); + + VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); + + VectorType realEigenvalues = eig.eigenvalues().real(); + std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); + VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + + // check eigenvectors + typename GeneralizedEigenSolver::EigenvectorsType D = eig.eigenvalues().asDiagonal(); + typename GeneralizedEigenSolver::EigenvectorsType V = eig.eigenvectors(); + VERIFY_IS_APPROX(spdA*V, spdB*V*D); + } + + // non symmetric case: + { + GeneralizedEigenSolver eig(rows); + // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition + //Eigen::internal::set_is_malloc_allowed(false); + eig.compute(a,b); + //Eigen::internal::set_is_malloc_allowed(true); + for(Index k=0; k tmp = (eig.betas()(k)*a).template cast() - eig.alphas()(k)*b; + if(tmp.size()>1 && tmp.norm()>(std::numeric_limits::min)()) + tmp /= tmp.norm(); + VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) ); + } + // check eigenvectors + typename GeneralizedEigenSolver::EigenvectorsType D = eig.eigenvalues().asDiagonal(); + typename GeneralizedEigenSolver::EigenvectorsType V = eig.eigenvectors(); + VERIFY_IS_APPROX(a*V, b*V*D); + } + + // regression test for bug 1098 + { + GeneralizedSelfAdjointEigenSolver eig1(a.adjoint() * a,b.adjoint() * b); + eig1.compute(a.adjoint() * a,b.adjoint() * b); + GeneralizedEigenSolver eig2(a.adjoint() * a,b.adjoint() * b); + eig2.compute(a.adjoint() * a,b.adjoint() * b); + } +} + +void test_eigensolver_generalized_real() +{ + for(int i = 0; i < g_repeat; i++) { + int s = 0; + CALL_SUBTEST_1( generalized_eigensolver_real(Matrix4f()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) ); + + // some trivial but implementation-wise special cases + CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) ); + CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) ); + CALL_SUBTEST_3( generalized_eigensolver_real(Matrix()) ); + CALL_SUBTEST_4( generalized_eigensolver_real(Matrix2d()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } +} diff --git a/thirdparty/eigen/test/eigensolver_generic.cpp b/thirdparty/eigen/test/eigensolver_generic.cpp new file mode 100644 index 000000000..e18fbf687 --- /dev/null +++ b/thirdparty/eigen/test/eigensolver_generic.cpp @@ -0,0 +1,157 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2010,2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template void eigensolver(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + /* this test covers the following files: + EigenSolver.h + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix RealVectorType; + typedef typename std::complex::Real> Complex; + + MatrixType a = MatrixType::Random(rows,cols); + MatrixType a1 = MatrixType::Random(rows,cols); + MatrixType symmA = a.adjoint() * a + a1.adjoint() * a1; + + EigenSolver ei0(symmA); + VERIFY_IS_EQUAL(ei0.info(), Success); + VERIFY_IS_APPROX(symmA * ei0.pseudoEigenvectors(), ei0.pseudoEigenvectors() * ei0.pseudoEigenvalueMatrix()); + VERIFY_IS_APPROX((symmA.template cast()) * (ei0.pseudoEigenvectors().template cast()), + (ei0.pseudoEigenvectors().template cast()) * (ei0.eigenvalues().asDiagonal())); + + EigenSolver ei1(a); + VERIFY_IS_EQUAL(ei1.info(), Success); + VERIFY_IS_APPROX(a * ei1.pseudoEigenvectors(), ei1.pseudoEigenvectors() * ei1.pseudoEigenvalueMatrix()); + VERIFY_IS_APPROX(a.template cast() * ei1.eigenvectors(), + ei1.eigenvectors() * ei1.eigenvalues().asDiagonal()); + VERIFY_IS_APPROX(ei1.eigenvectors().colwise().norm(), RealVectorType::Ones(rows).transpose()); + VERIFY_IS_APPROX(a.eigenvalues(), ei1.eigenvalues()); + + EigenSolver ei2; + ei2.setMaxIterations(RealSchur::m_maxIterationsPerRow * rows).compute(a); + VERIFY_IS_EQUAL(ei2.info(), Success); + VERIFY_IS_EQUAL(ei2.eigenvectors(), ei1.eigenvectors()); + VERIFY_IS_EQUAL(ei2.eigenvalues(), ei1.eigenvalues()); + if (rows > 2) { + ei2.setMaxIterations(1).compute(a); + VERIFY_IS_EQUAL(ei2.info(), NoConvergence); + VERIFY_IS_EQUAL(ei2.getMaxIterations(), 1); + } + + EigenSolver eiNoEivecs(a, false); + VERIFY_IS_EQUAL(eiNoEivecs.info(), Success); + VERIFY_IS_APPROX(ei1.eigenvalues(), eiNoEivecs.eigenvalues()); + VERIFY_IS_APPROX(ei1.pseudoEigenvalueMatrix(), eiNoEivecs.pseudoEigenvalueMatrix()); + + MatrixType id = MatrixType::Identity(rows, cols); + VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); + + if (rows > 2 && rows < 20) + { + // Test matrix with NaN + a(0,0) = std::numeric_limits::quiet_NaN(); + EigenSolver eiNaN(a); + VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); + } + + // regression test for bug 1098 + { + EigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } +} + +template void eigensolver_verify_assert(const MatrixType& m) +{ + EigenSolver eig; + VERIFY_RAISES_ASSERT(eig.eigenvectors()); + VERIFY_RAISES_ASSERT(eig.pseudoEigenvectors()); + VERIFY_RAISES_ASSERT(eig.pseudoEigenvalueMatrix()); + VERIFY_RAISES_ASSERT(eig.eigenvalues()); + + MatrixType a = MatrixType::Random(m.rows(),m.cols()); + eig.compute(a, false); + VERIFY_RAISES_ASSERT(eig.eigenvectors()); + VERIFY_RAISES_ASSERT(eig.pseudoEigenvectors()); +} + +void test_eigensolver_generic() +{ + int s = 0; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( eigensolver(Matrix4f()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_2( eigensolver(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + // some trivial but implementation-wise tricky cases + CALL_SUBTEST_2( eigensolver(MatrixXd(1,1)) ); + CALL_SUBTEST_2( eigensolver(MatrixXd(2,2)) ); + CALL_SUBTEST_3( eigensolver(Matrix()) ); + CALL_SUBTEST_4( eigensolver(Matrix2d()) ); + } + + CALL_SUBTEST_1( eigensolver_verify_assert(Matrix4f()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_2( eigensolver_verify_assert(MatrixXd(s,s)) ); + CALL_SUBTEST_3( eigensolver_verify_assert(Matrix()) ); + CALL_SUBTEST_4( eigensolver_verify_assert(Matrix2d()) ); + + // Test problem size constructors + CALL_SUBTEST_5(EigenSolver tmp(s)); + + // regression test for bug 410 + CALL_SUBTEST_2( + { + MatrixXd A(1,1); + A(0,0) = std::sqrt(-1.); // is Not-a-Number + Eigen::EigenSolver solver(A); + VERIFY_IS_EQUAL(solver.info(), NumericalIssue); + } + ); + +#ifdef EIGEN_TEST_PART_2 + { + // regression test for bug 793 + MatrixXd a(3,3); + a << 0, 0, 1, + 1, 1, 1, + 1, 1e+200, 1; + Eigen::EigenSolver eig(a); + double scale = 1e-200; // scale to avoid overflow during the comparisons + VERIFY_IS_APPROX(a * eig.pseudoEigenvectors()*scale, eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()*scale); + VERIFY_IS_APPROX(a * eig.eigenvectors()*scale, eig.eigenvectors() * eig.eigenvalues().asDiagonal()*scale); + } + { + // check a case where all eigenvalues are null. + MatrixXd a(2,2); + a << 1, 1, + -1, -1; + Eigen::EigenSolver eig(a); + VERIFY_IS_APPROX(eig.pseudoEigenvectors().squaredNorm(), 2.); + VERIFY_IS_APPROX((a * eig.pseudoEigenvectors()).norm()+1., 1.); + VERIFY_IS_APPROX((eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()).norm()+1., 1.); + VERIFY_IS_APPROX((a * eig.eigenvectors()).norm()+1., 1.); + VERIFY_IS_APPROX((eig.eigenvectors() * eig.eigenvalues().asDiagonal()).norm()+1., 1.); + } +#endif + + TEST_SET_BUT_UNUSED_VARIABLE(s) +} diff --git a/thirdparty/eigen/test/eigensolver_selfadjoint.cpp b/thirdparty/eigen/test/eigensolver_selfadjoint.cpp new file mode 100644 index 000000000..4ed126116 --- /dev/null +++ b/thirdparty/eigen/test/eigensolver_selfadjoint.cpp @@ -0,0 +1,265 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2010 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include "svd_fill.h" +#include +#include +#include + + +template void selfadjointeigensolver_essential_check(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + RealScalar eival_eps = numext::mini(test_precision(), NumTraits::dummy_precision()*20000); + + SelfAdjointEigenSolver eiSymm(m); + VERIFY_IS_EQUAL(eiSymm.info(), Success); + + RealScalar scaling = m.cwiseAbs().maxCoeff(); + + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(eiSymm.eigenvalues().cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX((m.template selfadjointView() * eiSymm.eigenvectors())/scaling, + (eiSymm.eigenvectors() * eiSymm.eigenvalues().asDiagonal())/scaling); + } + VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues(), eiSymm.eigenvalues()); + VERIFY_IS_UNITARY(eiSymm.eigenvectors()); + + if(m.cols()<=4) + { + SelfAdjointEigenSolver eiDirect; + eiDirect.computeDirect(m); + VERIFY_IS_EQUAL(eiDirect.info(), Success); + if(! eiSymm.eigenvalues().isApprox(eiDirect.eigenvalues(), eival_eps) ) + { + std::cerr << "reference eigenvalues: " << eiSymm.eigenvalues().transpose() << "\n" + << "obtained eigenvalues: " << eiDirect.eigenvalues().transpose() << "\n" + << "diff: " << (eiSymm.eigenvalues()-eiDirect.eigenvalues()).transpose() << "\n" + << "error (eps): " << (eiSymm.eigenvalues()-eiDirect.eigenvalues()).norm() / eiSymm.eigenvalues().norm() << " (" << eival_eps << ")\n"; + } + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(eiDirect.eigenvalues().cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX(eiSymm.eigenvalues()/scaling, eiDirect.eigenvalues()/scaling); + VERIFY_IS_APPROX((m.template selfadjointView() * eiDirect.eigenvectors())/scaling, + (eiDirect.eigenvectors() * eiDirect.eigenvalues().asDiagonal())/scaling); + VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues()/scaling, eiDirect.eigenvalues()/scaling); + } + + VERIFY_IS_UNITARY(eiDirect.eigenvectors()); + } +} + +template void selfadjointeigensolver(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + /* this test covers the following files: + EigenSolver.h, SelfAdjointEigenSolver.h (and indirectly: Tridiagonalization.h) + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + RealScalar largerEps = 10*test_precision(); + + MatrixType a = MatrixType::Random(rows,cols); + MatrixType a1 = MatrixType::Random(rows,cols); + MatrixType symmA = a.adjoint() * a + a1.adjoint() * a1; + MatrixType symmC = symmA; + + svd_fill_random(symmA,Symmetric); + + symmA.template triangularView().setZero(); + symmC.template triangularView().setZero(); + + MatrixType b = MatrixType::Random(rows,cols); + MatrixType b1 = MatrixType::Random(rows,cols); + MatrixType symmB = b.adjoint() * b + b1.adjoint() * b1; + symmB.template triangularView().setZero(); + + CALL_SUBTEST( selfadjointeigensolver_essential_check(symmA) ); + + SelfAdjointEigenSolver eiSymm(symmA); + // generalized eigen pb + GeneralizedSelfAdjointEigenSolver eiSymmGen(symmC, symmB); + + SelfAdjointEigenSolver eiSymmNoEivecs(symmA, false); + VERIFY_IS_EQUAL(eiSymmNoEivecs.info(), Success); + VERIFY_IS_APPROX(eiSymm.eigenvalues(), eiSymmNoEivecs.eigenvalues()); + + // generalized eigen problem Ax = lBx + eiSymmGen.compute(symmC, symmB,Ax_lBx); + VERIFY_IS_EQUAL(eiSymmGen.info(), Success); + VERIFY((symmC.template selfadjointView() * eiSymmGen.eigenvectors()).isApprox( + symmB.template selfadjointView() * (eiSymmGen.eigenvectors() * eiSymmGen.eigenvalues().asDiagonal()), largerEps)); + + // generalized eigen problem BAx = lx + eiSymmGen.compute(symmC, symmB,BAx_lx); + VERIFY_IS_EQUAL(eiSymmGen.info(), Success); + VERIFY((symmB.template selfadjointView() * (symmC.template selfadjointView() * eiSymmGen.eigenvectors())).isApprox( + (eiSymmGen.eigenvectors() * eiSymmGen.eigenvalues().asDiagonal()), largerEps)); + + // generalized eigen problem ABx = lx + eiSymmGen.compute(symmC, symmB,ABx_lx); + VERIFY_IS_EQUAL(eiSymmGen.info(), Success); + VERIFY((symmC.template selfadjointView() * (symmB.template selfadjointView() * eiSymmGen.eigenvectors())).isApprox( + (eiSymmGen.eigenvectors() * eiSymmGen.eigenvalues().asDiagonal()), largerEps)); + + + eiSymm.compute(symmC); + MatrixType sqrtSymmA = eiSymm.operatorSqrt(); + VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), sqrtSymmA*sqrtSymmA); + VERIFY_IS_APPROX(sqrtSymmA, symmC.template selfadjointView()*eiSymm.operatorInverseSqrt()); + + MatrixType id = MatrixType::Identity(rows, cols); + VERIFY_IS_APPROX(id.template selfadjointView().operatorNorm(), RealScalar(1)); + + SelfAdjointEigenSolver eiSymmUninitialized; + VERIFY_RAISES_ASSERT(eiSymmUninitialized.info()); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.eigenvalues()); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.eigenvectors()); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.operatorSqrt()); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.operatorInverseSqrt()); + + eiSymmUninitialized.compute(symmA, false); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.eigenvectors()); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.operatorSqrt()); + VERIFY_RAISES_ASSERT(eiSymmUninitialized.operatorInverseSqrt()); + + // test Tridiagonalization's methods + Tridiagonalization tridiag(symmC); + VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal()); + VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>()); + Matrix T = tridiag.matrixT(); + if(rows>1 && cols>1) { + // FIXME check that upper and lower part are 0: + //VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView().isZero()); + } + VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal()); + VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>()); + VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint()); + VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint()); + + // Test computation of eigenvalues from tridiagonal matrix + if(rows > 1) + { + SelfAdjointEigenSolver eiSymmTridiag; + eiSymmTridiag.computeFromTridiagonal(tridiag.matrixT().diagonal(), tridiag.matrixT().diagonal(-1), ComputeEigenvectors); + VERIFY_IS_APPROX(eiSymm.eigenvalues(), eiSymmTridiag.eigenvalues()); + VERIFY_IS_APPROX(tridiag.matrixT(), eiSymmTridiag.eigenvectors().real() * eiSymmTridiag.eigenvalues().asDiagonal() * eiSymmTridiag.eigenvectors().real().transpose()); + } + + if (rows > 1 && rows < 20) + { + // Test matrix with NaN + symmC(0,0) = std::numeric_limits::quiet_NaN(); + SelfAdjointEigenSolver eiSymmNaN(symmC); + VERIFY_IS_EQUAL(eiSymmNaN.info(), NoConvergence); + } + + // regression test for bug 1098 + { + SelfAdjointEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } +} + +template +void bug_854() +{ + Matrix3d m; + m << 850.961, 51.966, 0, + 51.966, 254.841, 0, + 0, 0, 0; + selfadjointeigensolver_essential_check(m); +} + +template +void bug_1014() +{ + Matrix3d m; + m << 0.11111111111111114658, 0, 0, + 0, 0.11111111111111109107, 0, + 0, 0, 0.11111111111111107719; + selfadjointeigensolver_essential_check(m); +} + +template +void bug_1225() +{ + Matrix3d m1, m2; + m1.setRandom(); + m1 = m1*m1.transpose(); + m2 = m1.triangularView(); + SelfAdjointEigenSolver eig1(m1); + SelfAdjointEigenSolver eig2(m2.selfadjointView()); + VERIFY_IS_APPROX(eig1.eigenvalues(), eig2.eigenvalues()); +} + +template +void bug_1204() +{ + SparseMatrix A(2,2); + A.setIdentity(); + SelfAdjointEigenSolver > eig(A); +} + +void test_eigensolver_selfadjoint() +{ + int s = 0; + for(int i = 0; i < g_repeat; i++) { + // trivial test for 1x1 matrices: + CALL_SUBTEST_1( selfadjointeigensolver(Matrix())); + CALL_SUBTEST_1( selfadjointeigensolver(Matrix())); + // very important to test 3x3 and 2x2 matrices since we provide special paths for them + CALL_SUBTEST_12( selfadjointeigensolver(Matrix2f()) ); + CALL_SUBTEST_12( selfadjointeigensolver(Matrix2d()) ); + CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) ); + CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) ); + CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) ); + + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) ); + CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(s,s)) ); + CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(s,s)) ); + CALL_SUBTEST_9( selfadjointeigensolver(Matrix,Dynamic,Dynamic,RowMajor>(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + // some trivial but implementation-wise tricky cases + CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(1,1)) ); + CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(2,2)) ); + CALL_SUBTEST_6( selfadjointeigensolver(Matrix()) ); + CALL_SUBTEST_7( selfadjointeigensolver(Matrix()) ); + } + + CALL_SUBTEST_13( bug_854<0>() ); + CALL_SUBTEST_13( bug_1014<0>() ); + CALL_SUBTEST_13( bug_1204<0>() ); + CALL_SUBTEST_13( bug_1225<0>() ); + + // Test problem size constructors + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); + CALL_SUBTEST_8(SelfAdjointEigenSolver tmp1(s)); + CALL_SUBTEST_8(Tridiagonalization tmp2(s)); + + TEST_SET_BUT_UNUSED_VARIABLE(s) +} + diff --git a/thirdparty/eigen/test/evaluator_common.h b/thirdparty/eigen/test/evaluator_common.h new file mode 100644 index 000000000..e69de29bb diff --git a/thirdparty/eigen/test/evaluators.cpp b/thirdparty/eigen/test/evaluators.cpp new file mode 100644 index 000000000..aed5a05a7 --- /dev/null +++ b/thirdparty/eigen/test/evaluators.cpp @@ -0,0 +1,499 @@ + +#include "main.h" + +namespace Eigen { + + template + const Product + prod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } + + template + const Product + lazyprod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } + + template + EIGEN_STRONG_INLINE + DstXprType& copy_using_evaluator(const EigenBase &dst, const SrcXprType &src) + { + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + return dst.const_cast_derived(); + } + + template class StorageBase, typename SrcXprType> + EIGEN_STRONG_INLINE + const DstXprType& copy_using_evaluator(const NoAlias& dst, const SrcXprType &src) + { + call_assignment(dst, src.derived(), internal::assign_op()); + return dst.expression(); + } + + template + EIGEN_STRONG_INLINE + DstXprType& copy_using_evaluator(const PlainObjectBase &dst, const SrcXprType &src) + { + #ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) + : (dst.rows() == src.rows() && dst.cols() == src.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + #else + dst.const_cast_derived().resizeLike(src.derived()); + #endif + + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + return dst.const_cast_derived(); + } + + template + void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); + } + + template + void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); + } + + template + void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); + } + + template + void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); + } + + template + void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(dst.const_cast_derived(), src.const_cast_derived(), internal::swap_assign_op()); + } + + namespace internal { + template class StorageBase, typename Src, typename Func> + EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) + { + call_assignment_no_alias(dst.expression(), src, func); + } + } + +} + +template long get_cost(const XprType& ) { return Eigen::internal::evaluator::CoeffReadCost; } + +using namespace std; + +#define VERIFY_IS_APPROX_EVALUATOR(DEST,EXPR) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (EXPR).eval()); +#define VERIFY_IS_APPROX_EVALUATOR2(DEST,EXPR,REF) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (REF).eval()); + +void test_evaluators() +{ + // Testing Matrix evaluator and Transpose + Vector2d v = Vector2d::Random(); + const Vector2d v_const(v); + Vector2d v2; + RowVector2d w; + + VERIFY_IS_APPROX_EVALUATOR(v2, v); + VERIFY_IS_APPROX_EVALUATOR(v2, v_const); + + // Testing Transpose + VERIFY_IS_APPROX_EVALUATOR(w, v.transpose()); // Transpose as rvalue + VERIFY_IS_APPROX_EVALUATOR(w, v_const.transpose()); + + copy_using_evaluator(w.transpose(), v); // Transpose as lvalue + VERIFY_IS_APPROX(w,v.transpose().eval()); + + copy_using_evaluator(w.transpose(), v_const); + VERIFY_IS_APPROX(w,v_const.transpose().eval()); + + // Testing Array evaluator + { + ArrayXXf a(2,3); + ArrayXXf b(3,2); + a << 1,2,3, 4,5,6; + const ArrayXXf a_const(a); + + VERIFY_IS_APPROX_EVALUATOR(b, a.transpose()); + + VERIFY_IS_APPROX_EVALUATOR(b, a_const.transpose()); + + // Testing CwiseNullaryOp evaluator + copy_using_evaluator(w, RowVector2d::Random()); + VERIFY((w.array() >= -1).all() && (w.array() <= 1).all()); // not easy to test ... + + VERIFY_IS_APPROX_EVALUATOR(w, RowVector2d::Zero()); + + VERIFY_IS_APPROX_EVALUATOR(w, RowVector2d::Constant(3)); + + // mix CwiseNullaryOp and transpose + VERIFY_IS_APPROX_EVALUATOR(w, Vector2d::Zero().transpose()); + } + + { + // test product expressions + int s = internal::random(1,100); + MatrixXf a(s,s), b(s,s), c(s,s), d(s,s); + a.setRandom(); + b.setRandom(); + c.setRandom(); + d.setRandom(); + VERIFY_IS_APPROX_EVALUATOR(d, (a + b)); + VERIFY_IS_APPROX_EVALUATOR(d, (a + b).transpose()); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b), a*b); + VERIFY_IS_APPROX_EVALUATOR2(d.noalias(), prod(a,b), a*b); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + c, a*b + c); + VERIFY_IS_APPROX_EVALUATOR2(d, s * prod(a,b), s * a*b); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b).transpose(), (a*b).transpose()); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + prod(b,c), a*b + b*c); + + // check that prod works even with aliasing present + c = a*a; + copy_using_evaluator(a, prod(a,a)); + VERIFY_IS_APPROX(a,c); + + // check compound assignment of products + d = c; + add_assign_using_evaluator(c.noalias(), prod(a,b)); + d.noalias() += a*b; + VERIFY_IS_APPROX(c, d); + + d = c; + subtract_assign_using_evaluator(c.noalias(), prod(a,b)); + d.noalias() -= a*b; + VERIFY_IS_APPROX(c, d); + } + + { + // test product with all possible sizes + int s = internal::random(1,100); + Matrix m11, res11; m11.setRandom(1,1); + Matrix m14, res14; m14.setRandom(1,4); + Matrix m1X, res1X; m1X.setRandom(1,s); + Matrix m41, res41; m41.setRandom(4,1); + Matrix m44, res44; m44.setRandom(4,4); + Matrix m4X, res4X; m4X.setRandom(4,s); + Matrix mX1, resX1; mX1.setRandom(s,1); + Matrix mX4, resX4; mX4.setRandom(s,4); + Matrix mXX, resXX; mXX.setRandom(s,s); + + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m11,m11), m11*m11); + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m14,m41), m14*m41); + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m1X,mX1), m1X*mX1); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m11,m14), m11*m14); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m14,m44), m14*m44); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m1X,mX4), m1X*mX4); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m11,m1X), m11*m1X); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m14,m4X), m14*m4X); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m1X,mXX), m1X*mXX); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m41,m11), m41*m11); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m44,m41), m44*m41); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m4X,mX1), m4X*mX1); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m41,m14), m41*m14); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m44,m44), m44*m44); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m4X,mX4), m4X*mX4); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m41,m1X), m41*m1X); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m44,m4X), m44*m4X); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m4X,mXX), m4X*mXX); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX1,m11), mX1*m11); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX4,m41), mX4*m41); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mXX,mX1), mXX*mX1); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX1,m14), mX1*m14); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX4,m44), mX4*m44); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mXX,mX4), mXX*mX4); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX1,m1X), mX1*m1X); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX4,m4X), mX4*m4X); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mXX,mXX), mXX*mXX); + } + + { + ArrayXXf a(2,3); + ArrayXXf b(3,2); + a << 1,2,3, 4,5,6; + const ArrayXXf a_const(a); + + // this does not work because Random is eval-before-nested: + // copy_using_evaluator(w, Vector2d::Random().transpose()); + + // test CwiseUnaryOp + VERIFY_IS_APPROX_EVALUATOR(v2, 3 * v); + VERIFY_IS_APPROX_EVALUATOR(w, (3 * v).transpose()); + VERIFY_IS_APPROX_EVALUATOR(b, (a + 3).transpose()); + VERIFY_IS_APPROX_EVALUATOR(b, (2 * a_const + 3).transpose()); + + // test CwiseBinaryOp + VERIFY_IS_APPROX_EVALUATOR(v2, v + Vector2d::Ones()); + VERIFY_IS_APPROX_EVALUATOR(w, (v + Vector2d::Ones()).transpose().cwiseProduct(RowVector2d::Constant(3))); + + // dynamic matrices and arrays + MatrixXd mat1(6,6), mat2(6,6); + VERIFY_IS_APPROX_EVALUATOR(mat1, MatrixXd::Identity(6,6)); + VERIFY_IS_APPROX_EVALUATOR(mat2, mat1); + copy_using_evaluator(mat2.transpose(), mat1); + VERIFY_IS_APPROX(mat2.transpose(), mat1); + + ArrayXXd arr1(6,6), arr2(6,6); + VERIFY_IS_APPROX_EVALUATOR(arr1, ArrayXXd::Constant(6,6, 3.0)); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1); + + // test automatic resizing + mat2.resize(3,3); + VERIFY_IS_APPROX_EVALUATOR(mat2, mat1); + arr2.resize(9,9); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1); + + // test direct traversal + Matrix3f m3; + Array33f a3; + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity()); // matrix, nullary + // TODO: find a way to test direct traversal with array + VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose()); // transpose + VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity()); // unary + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + Matrix3f::Zero()); // binary + VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2)); // block + + // test linear traversal + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero()); // matrix, nullary + VERIFY_IS_APPROX_EVALUATOR(a3, Array33f::Zero()); // array + VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Zero().transpose()); // transpose + VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Zero()); // unary + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero() + m3); // binary + + // test inner vectorization + Matrix4f m4, m4src = Matrix4f::Random(); + Array44f a4, a4src = Matrix4f::Random(); + VERIFY_IS_APPROX_EVALUATOR(m4, m4src); // matrix + VERIFY_IS_APPROX_EVALUATOR(a4, a4src); // array + VERIFY_IS_APPROX_EVALUATOR(m4.transpose(), m4src.transpose()); // transpose + // TODO: find out why Matrix4f::Zero() does not allow inner vectorization + VERIFY_IS_APPROX_EVALUATOR(m4, 2 * m4src); // unary + VERIFY_IS_APPROX_EVALUATOR(m4, m4src + m4src); // binary + + // test linear vectorization + MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6); + ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6); + VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc); // matrix + VERIFY_IS_APPROX_EVALUATOR(aX, aXsrc); // array + VERIFY_IS_APPROX_EVALUATOR(mX.transpose(), mXsrc.transpose()); // transpose + VERIFY_IS_APPROX_EVALUATOR(mX, MatrixXf::Zero(6,6)); // nullary + VERIFY_IS_APPROX_EVALUATOR(mX, 2 * mXsrc); // unary + VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc + mXsrc); // binary + + // test blocks and slice vectorization + VERIFY_IS_APPROX_EVALUATOR(m4, (mXsrc.block<4,4>(1,0))); + VERIFY_IS_APPROX_EVALUATOR(aX, ArrayXXf::Constant(10, 10, 3.0).block(2, 3, 6, 6)); + + Matrix4f m4ref = m4; + copy_using_evaluator(m4.block(1, 1, 2, 3), m3.bottomRows(2)); + m4ref.block(1, 1, 2, 3) = m3.bottomRows(2); + VERIFY_IS_APPROX(m4, m4ref); + + mX.setIdentity(20,20); + MatrixXf mXref = MatrixXf::Identity(20,20); + mXsrc = MatrixXf::Random(9,12); + copy_using_evaluator(mX.block(4, 4, 9, 12), mXsrc); + mXref.block(4, 4, 9, 12) = mXsrc; + VERIFY_IS_APPROX(mX, mXref); + + // test Map + const float raw[3] = {1,2,3}; + float buffer[3] = {0,0,0}; + Vector3f v3; + Array3f a3f; + VERIFY_IS_APPROX_EVALUATOR(v3, Map(raw)); + VERIFY_IS_APPROX_EVALUATOR(a3f, Map(raw)); + Vector3f::Map(buffer) = 2*v3; + VERIFY(buffer[0] == 2); + VERIFY(buffer[1] == 4); + VERIFY(buffer[2] == 6); + + // test CwiseUnaryView + mat1.setRandom(); + mat2.setIdentity(); + MatrixXcd matXcd(6,6), matXcd_ref(6,6); + copy_using_evaluator(matXcd.real(), mat1); + copy_using_evaluator(matXcd.imag(), mat2); + matXcd_ref.real() = mat1; + matXcd_ref.imag() = mat2; + VERIFY_IS_APPROX(matXcd, matXcd_ref); + + // test Select + VERIFY_IS_APPROX_EVALUATOR(aX, (aXsrc > 0).select(aXsrc, -aXsrc)); + + // test Replicate + mXsrc = MatrixXf::Random(6, 6); + VectorXf vX = VectorXf::Random(6); + mX.resize(6, 6); + VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc.colwise() + vX); + matXcd.resize(12, 12); + VERIFY_IS_APPROX_EVALUATOR(matXcd, matXcd_ref.replicate(2,2)); + VERIFY_IS_APPROX_EVALUATOR(matXcd, (matXcd_ref.replicate<2,2>())); + + // test partial reductions + VectorXd vec1(6); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.rowwise().sum()); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.colwise().sum().transpose()); + + // test MatrixWrapper and ArrayWrapper + mat1.setRandom(6,6); + arr1.setRandom(6,6); + VERIFY_IS_APPROX_EVALUATOR(mat2, arr1.matrix()); + VERIFY_IS_APPROX_EVALUATOR(arr2, mat1.array()); + VERIFY_IS_APPROX_EVALUATOR(mat2, (arr1 + 2).matrix()); + VERIFY_IS_APPROX_EVALUATOR(arr2, mat1.array() + 2); + mat2.array() = arr1 * arr1; + VERIFY_IS_APPROX(mat2, (arr1 * arr1).matrix()); + arr2.matrix() = MatrixXd::Identity(6,6); + VERIFY_IS_APPROX(arr2, MatrixXd::Identity(6,6).array()); + + // test Reverse + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.reverse()); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.colwise().reverse()); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.rowwise().reverse()); + arr2.reverse() = arr1; + VERIFY_IS_APPROX(arr2, arr1.reverse()); + mat2.array() = mat1.array().reverse(); + VERIFY_IS_APPROX(mat2.array(), mat1.array().reverse()); + + // test Diagonal + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal()); + vec1.resize(5); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal(1)); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal<-1>()); + vec1.setRandom(); + + mat2 = mat1; + copy_using_evaluator(mat1.diagonal(1), vec1); + mat2.diagonal(1) = vec1; + VERIFY_IS_APPROX(mat1, mat2); + + copy_using_evaluator(mat1.diagonal<-1>(), mat1.diagonal(1)); + mat2.diagonal<-1>() = mat2.diagonal(1); + VERIFY_IS_APPROX(mat1, mat2); + } + + { + // test swapping + MatrixXd mat1, mat2, mat1ref, mat2ref; + mat1ref = mat1 = MatrixXd::Random(6, 6); + mat2ref = mat2 = 2 * mat1 + MatrixXd::Identity(6, 6); + swap_using_evaluator(mat1, mat2); + mat1ref.swap(mat2ref); + VERIFY_IS_APPROX(mat1, mat1ref); + VERIFY_IS_APPROX(mat2, mat2ref); + + swap_using_evaluator(mat1.block(0, 0, 3, 3), mat2.block(3, 3, 3, 3)); + mat1ref.block(0, 0, 3, 3).swap(mat2ref.block(3, 3, 3, 3)); + VERIFY_IS_APPROX(mat1, mat1ref); + VERIFY_IS_APPROX(mat2, mat2ref); + + swap_using_evaluator(mat1.row(2), mat2.col(3).transpose()); + mat1.row(2).swap(mat2.col(3).transpose()); + VERIFY_IS_APPROX(mat1, mat1ref); + VERIFY_IS_APPROX(mat2, mat2ref); + } + + { + // test compound assignment + const Matrix4d mat_const = Matrix4d::Random(); + Matrix4d mat, mat_ref; + mat = mat_ref = Matrix4d::Identity(); + add_assign_using_evaluator(mat, mat_const); + mat_ref += mat_const; + VERIFY_IS_APPROX(mat, mat_ref); + + subtract_assign_using_evaluator(mat.row(1), 2*mat.row(2)); + mat_ref.row(1) -= 2*mat_ref.row(2); + VERIFY_IS_APPROX(mat, mat_ref); + + const ArrayXXf arr_const = ArrayXXf::Random(5,3); + ArrayXXf arr, arr_ref; + arr = arr_ref = ArrayXXf::Constant(5, 3, 0.5); + multiply_assign_using_evaluator(arr, arr_const); + arr_ref *= arr_const; + VERIFY_IS_APPROX(arr, arr_ref); + + divide_assign_using_evaluator(arr.row(1), arr.row(2) + 1); + arr_ref.row(1) /= (arr_ref.row(2) + 1); + VERIFY_IS_APPROX(arr, arr_ref); + } + + { + // test triangular shapes + MatrixXd A = MatrixXd::Random(6,6), B(6,6), C(6,6), D(6,6); + A.setRandom();B.setRandom(); + VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView(), MatrixXd(A.triangularView())); + + A.setRandom();B.setRandom(); + VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView(), MatrixXd(A.triangularView())); + + A.setRandom();B.setRandom(); + VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView(), MatrixXd(A.triangularView())); + + A.setRandom();B.setRandom(); + C = B; C.triangularView() = A; + copy_using_evaluator(B.triangularView(), A); + VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView(), A)"); + + A.setRandom();B.setRandom(); + C = B; C.triangularView() = A.triangularView(); + copy_using_evaluator(B.triangularView(), A.triangularView()); + VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView(), A.triangularView())"); + + + A.setRandom();B.setRandom(); + C = B; C.triangularView() = A.triangularView().transpose(); + copy_using_evaluator(B.triangularView(), A.triangularView().transpose()); + VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView(), A.triangularView().transpose())"); + + + A.setRandom();B.setRandom(); C = B; D = A; + C.triangularView().swap(D.triangularView()); + swap_using_evaluator(B.triangularView(), A.triangularView()); + VERIFY(B.isApprox(C) && "swap_using_evaluator(B.triangularView(), A.triangularView())"); + + + VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.triangularView(),A), MatrixXd(A.triangularView()*A)); + + VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.selfadjointView(),A), MatrixXd(A.selfadjointView()*A)); + } + + { + // test diagonal shapes + VectorXd d = VectorXd::Random(6); + MatrixXd A = MatrixXd::Random(6,6), B(6,6); + A.setRandom();B.setRandom(); + + VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(d.asDiagonal(),A), MatrixXd(d.asDiagonal()*A)); + VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(A,d.asDiagonal()), MatrixXd(A*d.asDiagonal())); + } + + { + // test CoeffReadCost + Matrix4d a, b; + VERIFY_IS_EQUAL( get_cost(a), 1 ); + VERIFY_IS_EQUAL( get_cost(a+b), 3); + VERIFY_IS_EQUAL( get_cost(2*a+b), 4); + VERIFY_IS_EQUAL( get_cost(a*b), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a*b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a*b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15); + } +} diff --git a/thirdparty/eigen/test/exceptions.cpp b/thirdparty/eigen/test/exceptions.cpp new file mode 100644 index 000000000..b83fb82ba --- /dev/null +++ b/thirdparty/eigen/test/exceptions.cpp @@ -0,0 +1,113 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +// Various sanity tests with exceptions: +// - no memory leak when a custom scalar type trow an exceptions +// - todo: complete the list of tests! + +#define EIGEN_STACK_ALLOCATION_LIMIT 100000000 + +#include "main.h" + +struct my_exception +{ + my_exception() {} + ~my_exception() {} +}; + +class ScalarWithExceptions +{ + public: + ScalarWithExceptions() { init(); } + ScalarWithExceptions(const float& _v) { init(); *v = _v; } + ScalarWithExceptions(const ScalarWithExceptions& other) { init(); *v = *(other.v); } + ~ScalarWithExceptions() { + delete v; + instances--; + } + + void init() { + v = new float; + instances++; + } + + ScalarWithExceptions operator+(const ScalarWithExceptions& other) const + { + countdown--; + if(countdown<=0) + throw my_exception(); + return ScalarWithExceptions(*v+*other.v); + } + + ScalarWithExceptions operator-(const ScalarWithExceptions& other) const + { return ScalarWithExceptions(*v-*other.v); } + + ScalarWithExceptions operator*(const ScalarWithExceptions& other) const + { return ScalarWithExceptions((*v)*(*other.v)); } + + ScalarWithExceptions& operator+=(const ScalarWithExceptions& other) + { *v+=*other.v; return *this; } + ScalarWithExceptions& operator-=(const ScalarWithExceptions& other) + { *v-=*other.v; return *this; } + ScalarWithExceptions& operator=(const ScalarWithExceptions& other) + { *v = *(other.v); return *this; } + + bool operator==(const ScalarWithExceptions& other) const + { return *v==*other.v; } + bool operator!=(const ScalarWithExceptions& other) const + { return *v!=*other.v; } + + float* v; + static int instances; + static int countdown; +}; + +ScalarWithExceptions real(const ScalarWithExceptions &x) { return x; } +ScalarWithExceptions imag(const ScalarWithExceptions & ) { return 0; } +ScalarWithExceptions conj(const ScalarWithExceptions &x) { return x; } + +int ScalarWithExceptions::instances = 0; +int ScalarWithExceptions::countdown = 0; + + +#define CHECK_MEMLEAK(OP) { \ + ScalarWithExceptions::countdown = 100; \ + int before = ScalarWithExceptions::instances; \ + bool exception_thrown = false; \ + try { OP; } \ + catch (my_exception) { \ + exception_thrown = true; \ + VERIFY(ScalarWithExceptions::instances==before && "memory leak detected in " && EIGEN_MAKESTRING(OP)); \ + } \ + VERIFY(exception_thrown && " no exception thrown in " && EIGEN_MAKESTRING(OP)); \ + } + +void memoryleak() +{ + typedef Eigen::Matrix VectorType; + typedef Eigen::Matrix MatrixType; + + { + int n = 50; + VectorType v0(n), v1(n); + MatrixType m0(n,n), m1(n,n), m2(n,n); + v0.setOnes(); v1.setOnes(); + m0.setOnes(); m1.setOnes(); m2.setOnes(); + CHECK_MEMLEAK(v0 = m0 * m1 * v1); + CHECK_MEMLEAK(m2 = m0 * m1 * m2); + CHECK_MEMLEAK((v0+v1).dot(v0+v1)); + } + VERIFY(ScalarWithExceptions::instances==0 && "global memory leak detected in " && EIGEN_MAKESTRING(OP)); \ +} + +void test_exceptions() +{ + CALL_SUBTEST( memoryleak() ); +} diff --git a/thirdparty/eigen/test/fastmath.cpp b/thirdparty/eigen/test/fastmath.cpp new file mode 100644 index 000000000..cc5db0746 --- /dev/null +++ b/thirdparty/eigen/test/fastmath.cpp @@ -0,0 +1,99 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void check(bool b, bool ref) +{ + std::cout << b; + if(b==ref) + std::cout << " OK "; + else + std::cout << " BAD "; +} + +#if EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1800 +namespace std { + template bool (isfinite)(T x) { return _finite(x); } + template bool (isnan)(T x) { return _isnan(x); } + template bool (isinf)(T x) { return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +} +#endif + +template +void check_inf_nan(bool dryrun) { + Matrix m(10); + m.setRandom(); + m(3) = std::numeric_limits::quiet_NaN(); + + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),true); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), true); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( (numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + T hidden_zero = (std::numeric_limits::min)()*(std::numeric_limits::min)(); + m(4) /= hidden_zero; + if(dryrun) + { + std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(4) << ") = "; check((std::isinf)(m(4)),true); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(4)), true); std::cout << "\n"; + std::cout << "std::isnan(" << m(4) << ") = "; check((std::isnan)(m(4)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(4)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(4)) ); + VERIFY( (numext::isinf)(m(4)) ); + VERIFY( !(numext::isnan)(m(4)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(3) = 0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),true); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), true); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 0); std::cout << "\n"; + std::cout << "\n\n"; + } + else + { + VERIFY( (numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( !(numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( !m.hasNaN() ); + } +} + +void test_fastmath() { + std::cout << "*** float *** \n\n"; check_inf_nan(true); + std::cout << "*** double ***\n\n"; check_inf_nan(true); + std::cout << "*** long double *** \n\n"; check_inf_nan(true); + + check_inf_nan(false); + check_inf_nan(false); + check_inf_nan(false); +} diff --git a/thirdparty/eigen/test/first_aligned.cpp b/thirdparty/eigen/test/first_aligned.cpp new file mode 100644 index 000000000..ae2d4bc42 --- /dev/null +++ b/thirdparty/eigen/test/first_aligned.cpp @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +void test_first_aligned_helper(Scalar *array, int size) +{ + const int packet_size = sizeof(Scalar) * internal::packet_traits::size; + VERIFY(((size_t(array) + sizeof(Scalar) * internal::first_default_aligned(array, size)) % packet_size) == 0); +} + +template +void test_none_aligned_helper(Scalar *array, int size) +{ + EIGEN_UNUSED_VARIABLE(array); + EIGEN_UNUSED_VARIABLE(size); + VERIFY(internal::packet_traits::size == 1 || internal::first_default_aligned(array, size) == size); +} + +struct some_non_vectorizable_type { float x; }; + +void test_first_aligned() +{ + EIGEN_ALIGN16 float array_float[100]; + test_first_aligned_helper(array_float, 50); + test_first_aligned_helper(array_float+1, 50); + test_first_aligned_helper(array_float+2, 50); + test_first_aligned_helper(array_float+3, 50); + test_first_aligned_helper(array_float+4, 50); + test_first_aligned_helper(array_float+5, 50); + + EIGEN_ALIGN16 double array_double[100]; + test_first_aligned_helper(array_double, 50); + test_first_aligned_helper(array_double+1, 50); + test_first_aligned_helper(array_double+2, 50); + + double *array_double_plus_4_bytes = (double*)(internal::UIntPtr(array_double)+4); + test_none_aligned_helper(array_double_plus_4_bytes, 50); + test_none_aligned_helper(array_double_plus_4_bytes+1, 50); + + some_non_vectorizable_type array_nonvec[100]; + test_first_aligned_helper(array_nonvec, 100); + test_none_aligned_helper(array_nonvec, 100); +} diff --git a/thirdparty/eigen/test/geo_alignedbox.cpp b/thirdparty/eigen/test/geo_alignedbox.cpp new file mode 100644 index 000000000..d2339a651 --- /dev/null +++ b/thirdparty/eigen/test/geo_alignedbox.cpp @@ -0,0 +1,189 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +#include +using namespace std; + +template EIGEN_DONT_INLINE +void kill_extra_precision(T& x) { eigen_assert((void*)(&x) != (void*)0); } + + +template void alignedbox(const BoxType& _box) +{ + /* this test covers the following files: + AlignedBox.h + */ + typedef typename BoxType::Index Index; + typedef typename BoxType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + + const Index dim = _box.dim(); + + VectorType p0 = VectorType::Random(dim); + VectorType p1 = VectorType::Random(dim); + while( p1 == p0 ){ + p1 = VectorType::Random(dim); } + RealScalar s1 = internal::random(0,1); + + BoxType b0(dim); + BoxType b1(VectorType::Random(dim),VectorType::Random(dim)); + BoxType b2; + + kill_extra_precision(b1); + kill_extra_precision(p0); + kill_extra_precision(p1); + + b0.extend(p0); + b0.extend(p1); + VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); + VERIFY(b0.contains(b0.center())); + VERIFY_IS_APPROX(b0.center(),(p0+p1)/Scalar(2)); + + (b2 = b0).extend(b1); + VERIFY(b2.contains(b0)); + VERIFY(b2.contains(b1)); + VERIFY_IS_APPROX(b2.clamp(b0), b0); + + // intersection + BoxType box1(VectorType::Random(dim)); + box1.extend(VectorType::Random(dim)); + BoxType box2(VectorType::Random(dim)); + box2.extend(VectorType::Random(dim)); + + VERIFY(box1.intersects(box2) == !box1.intersection(box2).isEmpty()); + + // alignment -- make sure there is no memory alignment assertion + BoxType *bp0 = new BoxType(dim); + BoxType *bp1 = new BoxType(dim); + bp0->extend(*bp1); + delete bp0; + delete bp1; + + // sampling + for( int i=0; i<10; ++i ) + { + VectorType r = b0.sample(); + VERIFY(b0.contains(r)); + } + +} + + + +template +void alignedboxCastTests(const BoxType& _box) +{ + // casting + typedef typename BoxType::Index Index; + typedef typename BoxType::Scalar Scalar; + typedef Matrix VectorType; + + const Index dim = _box.dim(); + + VectorType p0 = VectorType::Random(dim); + VectorType p1 = VectorType::Random(dim); + + BoxType b0(dim); + + b0.extend(p0); + b0.extend(p1); + + const int Dim = BoxType::AmbientDimAtCompileTime; + typedef typename GetDifferentType::type OtherScalar; + AlignedBox hp1f = b0.template cast(); + VERIFY_IS_APPROX(hp1f.template cast(),b0); + AlignedBox hp1d = b0.template cast(); + VERIFY_IS_APPROX(hp1d.template cast(),b0); +} + + +void specificTest1() +{ + Vector2f m; m << -1.0f, -2.0f; + Vector2f M; M << 1.0f, 5.0f; + + typedef AlignedBox2f BoxType; + BoxType box( m, M ); + + Vector2f sides = M-m; + VERIFY_IS_APPROX(sides, box.sizes() ); + VERIFY_IS_APPROX(sides[1], box.sizes()[1] ); + VERIFY_IS_APPROX(sides[1], box.sizes().maxCoeff() ); + VERIFY_IS_APPROX(sides[0], box.sizes().minCoeff() ); + + VERIFY_IS_APPROX( 14.0f, box.volume() ); + VERIFY_IS_APPROX( 53.0f, box.diagonal().squaredNorm() ); + VERIFY_IS_APPROX( std::sqrt( 53.0f ), box.diagonal().norm() ); + + VERIFY_IS_APPROX( m, box.corner( BoxType::BottomLeft ) ); + VERIFY_IS_APPROX( M, box.corner( BoxType::TopRight ) ); + Vector2f bottomRight; bottomRight << M[0], m[1]; + Vector2f topLeft; topLeft << m[0], M[1]; + VERIFY_IS_APPROX( bottomRight, box.corner( BoxType::BottomRight ) ); + VERIFY_IS_APPROX( topLeft, box.corner( BoxType::TopLeft ) ); +} + + +void specificTest2() +{ + Vector3i m; m << -1, -2, 0; + Vector3i M; M << 1, 5, 3; + + typedef AlignedBox3i BoxType; + BoxType box( m, M ); + + Vector3i sides = M-m; + VERIFY_IS_APPROX(sides, box.sizes() ); + VERIFY_IS_APPROX(sides[1], box.sizes()[1] ); + VERIFY_IS_APPROX(sides[1], box.sizes().maxCoeff() ); + VERIFY_IS_APPROX(sides[0], box.sizes().minCoeff() ); + + VERIFY_IS_APPROX( 42, box.volume() ); + VERIFY_IS_APPROX( 62, box.diagonal().squaredNorm() ); + + VERIFY_IS_APPROX( m, box.corner( BoxType::BottomLeftFloor ) ); + VERIFY_IS_APPROX( M, box.corner( BoxType::TopRightCeil ) ); + Vector3i bottomRightFloor; bottomRightFloor << M[0], m[1], m[2]; + Vector3i topLeftFloor; topLeftFloor << m[0], M[1], m[2]; + VERIFY_IS_APPROX( bottomRightFloor, box.corner( BoxType::BottomRightFloor ) ); + VERIFY_IS_APPROX( topLeftFloor, box.corner( BoxType::TopLeftFloor ) ); +} + + +void test_geo_alignedbox() +{ + for(int i = 0; i < g_repeat; i++) + { + CALL_SUBTEST_1( alignedbox(AlignedBox2f()) ); + CALL_SUBTEST_2( alignedboxCastTests(AlignedBox2f()) ); + + CALL_SUBTEST_3( alignedbox(AlignedBox3f()) ); + CALL_SUBTEST_4( alignedboxCastTests(AlignedBox3f()) ); + + CALL_SUBTEST_5( alignedbox(AlignedBox4d()) ); + CALL_SUBTEST_6( alignedboxCastTests(AlignedBox4d()) ); + + CALL_SUBTEST_7( alignedbox(AlignedBox1d()) ); + CALL_SUBTEST_8( alignedboxCastTests(AlignedBox1d()) ); + + CALL_SUBTEST_9( alignedbox(AlignedBox1i()) ); + CALL_SUBTEST_10( alignedbox(AlignedBox2i()) ); + CALL_SUBTEST_11( alignedbox(AlignedBox3i()) ); + + CALL_SUBTEST_14( alignedbox(AlignedBox(4)) ); + } + CALL_SUBTEST_12( specificTest1() ); + CALL_SUBTEST_13( specificTest2() ); +} diff --git a/thirdparty/eigen/test/geo_eulerangles.cpp b/thirdparty/eigen/test/geo_eulerangles.cpp new file mode 100644 index 000000000..932ebe773 --- /dev/null +++ b/thirdparty/eigen/test/geo_eulerangles.cpp @@ -0,0 +1,112 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + + +template +void verify_euler(const Matrix& ea, int i, int j, int k) +{ + typedef Matrix Matrix3; + typedef Matrix Vector3; + typedef AngleAxis AngleAxisx; + using std::abs; + Matrix3 m(AngleAxisx(ea[0], Vector3::Unit(i)) * AngleAxisx(ea[1], Vector3::Unit(j)) * AngleAxisx(ea[2], Vector3::Unit(k))); + Vector3 eabis = m.eulerAngles(i, j, k); + Matrix3 mbis(AngleAxisx(eabis[0], Vector3::Unit(i)) * AngleAxisx(eabis[1], Vector3::Unit(j)) * AngleAxisx(eabis[2], Vector3::Unit(k))); + VERIFY_IS_APPROX(m, mbis); + /* If I==K, and ea[1]==0, then there no unique solution. */ + /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision())) ) + VERIFY((ea-eabis).norm() <= test_precision()); + + // approx_or_less_than does not work for 0 + VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[1]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[2]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(EIGEN_PI)); +} + +template void check_all_var(const Matrix& ea) +{ + verify_euler(ea, 0,1,2); + verify_euler(ea, 0,1,0); + verify_euler(ea, 0,2,1); + verify_euler(ea, 0,2,0); + + verify_euler(ea, 1,2,0); + verify_euler(ea, 1,2,1); + verify_euler(ea, 1,0,2); + verify_euler(ea, 1,0,1); + + verify_euler(ea, 2,0,1); + verify_euler(ea, 2,0,2); + verify_euler(ea, 2,1,0); + verify_euler(ea, 2,1,2); +} + +template void eulerangles() +{ + typedef Matrix Matrix3; + typedef Matrix Vector3; + typedef Array Array3; + typedef Quaternion Quaternionx; + typedef AngleAxis AngleAxisx; + + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Quaternionx q1; + q1 = AngleAxisx(a, Vector3::Random().normalized()); + Matrix3 m; + m = q1; + + Vector3 ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with purely random Quaternion: + q1.coeffs() = Quaternionx::Coefficients::Random().normalized(); + m = q1; + ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi]. + ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1); + check_all_var(ea); + + ea[2] = ea[0] = internal::random(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[0] = ea[1] = internal::random(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[1] = 0; + check_all_var(ea); + + ea.head(2).setZero(); + check_all_var(ea); + + ea.setZero(); + check_all_var(ea); +} + +void test_geo_eulerangles() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( eulerangles() ); + CALL_SUBTEST_2( eulerangles() ); + } +} diff --git a/thirdparty/eigen/test/geo_homogeneous.cpp b/thirdparty/eigen/test/geo_homogeneous.cpp new file mode 100644 index 000000000..2187c7bf9 --- /dev/null +++ b/thirdparty/eigen/test/geo_homogeneous.cpp @@ -0,0 +1,125 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void homogeneous(void) +{ + /* this test covers the following files: + Homogeneous.h + */ + + typedef Matrix MatrixType; + typedef Matrix VectorType; + + typedef Matrix HMatrixType; + typedef Matrix HVectorType; + + typedef Matrix T1MatrixType; + typedef Matrix T2MatrixType; + typedef Matrix T3MatrixType; + + VectorType v0 = VectorType::Random(), + ones = VectorType::Ones(); + + HVectorType hv0 = HVectorType::Random(); + + MatrixType m0 = MatrixType::Random(); + + HMatrixType hm0 = HMatrixType::Random(); + + hv0 << v0, 1; + VERIFY_IS_APPROX(v0.homogeneous(), hv0); + VERIFY_IS_APPROX(v0, hv0.hnormalized()); + + VERIFY_IS_APPROX(v0.homogeneous().sum(), hv0.sum()); + VERIFY_IS_APPROX(v0.homogeneous().minCoeff(), hv0.minCoeff()); + VERIFY_IS_APPROX(v0.homogeneous().maxCoeff(), hv0.maxCoeff()); + + hm0 << m0, ones.transpose(); + VERIFY_IS_APPROX(m0.colwise().homogeneous(), hm0); + VERIFY_IS_APPROX(m0, hm0.colwise().hnormalized()); + hm0.row(Size-1).setRandom(); + for(int j=0; j aff; + Transform caff; + Transform proj; + Matrix pts; + Matrix pts1, pts2; + + aff.affine().setRandom(); + proj = caff = aff; + pts.setRandom(Size,internal::random(1,20)); + + pts1 = pts.colwise().homogeneous(); + VERIFY_IS_APPROX(aff * pts.colwise().homogeneous(), (aff * pts1).colwise().hnormalized()); + VERIFY_IS_APPROX(caff * pts.colwise().homogeneous(), (caff * pts1).colwise().hnormalized()); + VERIFY_IS_APPROX(proj * pts.colwise().homogeneous(), (proj * pts1)); + + VERIFY_IS_APPROX((aff * pts1).colwise().hnormalized(), aff * pts); + VERIFY_IS_APPROX((caff * pts1).colwise().hnormalized(), caff * pts); + + pts2 = pts1; + pts2.row(Size).setRandom(); + VERIFY_IS_APPROX((aff * pts2).colwise().hnormalized(), aff * pts2.colwise().hnormalized()); + VERIFY_IS_APPROX((caff * pts2).colwise().hnormalized(), caff * pts2.colwise().hnormalized()); + VERIFY_IS_APPROX((proj * pts2).colwise().hnormalized(), (proj * pts2.colwise().hnormalized().colwise().homogeneous()).colwise().hnormalized()); + + // Test combination of homogeneous + + VERIFY_IS_APPROX( (t2 * v0.homogeneous()).hnormalized(), + (t2.template topLeftCorner() * v0 + t2.template topRightCorner()) + / ((t2.template bottomLeftCorner<1,Size>()*v0).value() + t2(Size,Size)) ); + + VERIFY_IS_APPROX( (t2 * pts.colwise().homogeneous()).colwise().hnormalized(), + (Matrix(t2 * pts1).colwise().hnormalized()) ); + + VERIFY_IS_APPROX( (t2 .lazyProduct( v0.homogeneous() )).hnormalized(), (t2 * v0.homogeneous()).hnormalized() ); + VERIFY_IS_APPROX( (t2 .lazyProduct ( pts.colwise().homogeneous() )).colwise().hnormalized(), (t2 * pts1).colwise().hnormalized() ); + + VERIFY_IS_APPROX( (v0.transpose().homogeneous() .lazyProduct( t2 )).hnormalized(), (v0.transpose().homogeneous()*t2).hnormalized() ); + VERIFY_IS_APPROX( (pts.transpose().rowwise().homogeneous() .lazyProduct( t2 )).rowwise().hnormalized(), (pts1.transpose()*t2).rowwise().hnormalized() ); + + VERIFY_IS_APPROX( (t2.template triangularView() * v0.homogeneous()).eval(), (t2.template triangularView()*hv0) ); +} + +void test_geo_homogeneous() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( homogeneous() )); + CALL_SUBTEST_2(( homogeneous() )); + CALL_SUBTEST_3(( homogeneous() )); + } +} diff --git a/thirdparty/eigen/test/geo_hyperplane.cpp b/thirdparty/eigen/test/geo_hyperplane.cpp new file mode 100644 index 000000000..e77702bc7 --- /dev/null +++ b/thirdparty/eigen/test/geo_hyperplane.cpp @@ -0,0 +1,195 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +template void hyperplane(const HyperplaneType& _plane) +{ + /* this test covers the following files: + Hyperplane.h + */ + using std::abs; + typedef typename HyperplaneType::Index Index; + const Index dim = _plane.dim(); + enum { Options = HyperplaneType::Options }; + typedef typename HyperplaneType::Scalar Scalar; + typedef typename HyperplaneType::RealScalar RealScalar; + typedef Matrix VectorType; + typedef Matrix MatrixType; + + VectorType p0 = VectorType::Random(dim); + VectorType p1 = VectorType::Random(dim); + + VectorType n0 = VectorType::Random(dim).normalized(); + VectorType n1 = VectorType::Random(dim).normalized(); + + HyperplaneType pl0(n0, p0); + HyperplaneType pl1(n1, p1); + HyperplaneType pl2 = pl1; + + Scalar s0 = internal::random(); + Scalar s1 = internal::random(); + + VERIFY_IS_APPROX( n1.dot(n1), Scalar(1) ); + + VERIFY_IS_MUCH_SMALLER_THAN( pl0.absDistance(p0), Scalar(1) ); + if(numext::abs2(s0)>RealScalar(1e-6)) + VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0); + else + VERIFY_IS_MUCH_SMALLER_THAN( abs(pl1.signedDistance(p1 + n1 * s0) - s0), Scalar(1) ); + VERIFY_IS_MUCH_SMALLER_THAN( pl1.signedDistance(pl1.projection(p0)), Scalar(1) ); + VERIFY_IS_MUCH_SMALLER_THAN( pl1.absDistance(p1 + pl1.normal().unitOrthogonal() * s1), Scalar(1) ); + + // transform + if (!NumTraits::IsComplex) + { + MatrixType rot = MatrixType::Random(dim,dim).householderQr().householderQ(); + DiagonalMatrix scaling(VectorType::Random()); + Translation translation(VectorType::Random()); + + while(scaling.diagonal().cwiseAbs().minCoeff()::type OtherScalar; + Hyperplane hp1f = pl1.template cast(); + VERIFY_IS_APPROX(hp1f.template cast(),pl1); + Hyperplane hp1d = pl1.template cast(); + VERIFY_IS_APPROX(hp1d.template cast(),pl1); +} + +template void lines() +{ + using std::abs; + typedef Hyperplane HLine; + typedef ParametrizedLine PLine; + typedef Matrix Vector; + typedef Matrix CoeffsType; + + for(int i = 0; i < 10; i++) + { + Vector center = Vector::Random(); + Vector u = Vector::Random(); + Vector v = Vector::Random(); + Scalar a = internal::random(); + while (abs(a-1) < Scalar(1e-4)) a = internal::random(); + while (u.norm() < Scalar(1e-4)) u = Vector::Random(); + while (v.norm() < Scalar(1e-4)) v = Vector::Random(); + + HLine line_u = HLine::Through(center + u, center + a*u); + HLine line_v = HLine::Through(center + v, center + a*v); + + // the line equations should be normalized so that a^2+b^2=1 + VERIFY_IS_APPROX(line_u.normal().norm(), Scalar(1)); + VERIFY_IS_APPROX(line_v.normal().norm(), Scalar(1)); + + Vector result = line_u.intersection(line_v); + + // the lines should intersect at the point we called "center" + if(abs(a-1) > Scalar(1e-2) && abs(v.normalized().dot(u.normalized())) void planes() +{ + using std::abs; + typedef Hyperplane Plane; + typedef Matrix Vector; + + for(int i = 0; i < 10; i++) + { + Vector v0 = Vector::Random(); + Vector v1(v0), v2(v0); + if(internal::random(0,1)>0.25) + v1 += Vector::Random(); + if(internal::random(0,1)>0.25) + v2 += v1 * std::pow(internal::random(0,1),internal::random(1,16)); + if(internal::random(0,1)>0.25) + v2 += Vector::Random() * std::pow(internal::random(0,1),internal::random(1,16)); + + Plane p0 = Plane::Through(v0, v1, v2); + + VERIFY_IS_APPROX(p0.normal().norm(), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(p0.absDistance(v0), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(p0.absDistance(v1), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(p0.absDistance(v2), Scalar(1)); + } +} + +template void hyperplane_alignment() +{ + typedef Hyperplane Plane3a; + typedef Hyperplane Plane3u; + + EIGEN_ALIGN_MAX Scalar array1[4]; + EIGEN_ALIGN_MAX Scalar array2[4]; + EIGEN_ALIGN_MAX Scalar array3[4+1]; + Scalar* array3u = array3+1; + + Plane3a *p1 = ::new(reinterpret_cast(array1)) Plane3a; + Plane3u *p2 = ::new(reinterpret_cast(array2)) Plane3u; + Plane3u *p3 = ::new(reinterpret_cast(array3u)) Plane3u; + + p1->coeffs().setRandom(); + *p2 = *p1; + *p3 = *p1; + + VERIFY_IS_APPROX(p1->coeffs(), p2->coeffs()); + VERIFY_IS_APPROX(p1->coeffs(), p3->coeffs()); + + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES > 0 + if(internal::packet_traits::Vectorizable && internal::packet_traits::size<=4) + VERIFY_RAISES_ASSERT((::new(reinterpret_cast(array3u)) Plane3a)); + #endif +} + + +void test_geo_hyperplane() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( hyperplane(Hyperplane()) ); + CALL_SUBTEST_2( hyperplane(Hyperplane()) ); + CALL_SUBTEST_2( hyperplane(Hyperplane()) ); + CALL_SUBTEST_2( hyperplane_alignment() ); + CALL_SUBTEST_3( hyperplane(Hyperplane()) ); + CALL_SUBTEST_4( hyperplane(Hyperplane,5>()) ); + CALL_SUBTEST_1( lines() ); + CALL_SUBTEST_3( lines() ); + CALL_SUBTEST_2( planes() ); + CALL_SUBTEST_5( planes() ); + } +} diff --git a/thirdparty/eigen/test/geo_orthomethods.cpp b/thirdparty/eigen/test/geo_orthomethods.cpp new file mode 100644 index 000000000..e178df257 --- /dev/null +++ b/thirdparty/eigen/test/geo_orthomethods.cpp @@ -0,0 +1,133 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +/* this test covers the following files: + Geometry/OrthoMethods.h +*/ + +template void orthomethods_3() +{ + typedef typename NumTraits::Real RealScalar; + typedef Matrix Matrix3; + typedef Matrix Vector3; + + typedef Matrix Vector4; + + Vector3 v0 = Vector3::Random(), + v1 = Vector3::Random(), + v2 = Vector3::Random(); + + // cross product + VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).dot(v1), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(v1.dot(v1.cross(v2)), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).dot(v2), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(v2.dot(v1.cross(v2)), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(Vector3::Random()).dot(v1), Scalar(1)); + Matrix3 mat3; + mat3 << v0.normalized(), + (v0.cross(v1)).normalized(), + (v0.cross(v1).cross(v0)).normalized(); + VERIFY(mat3.isUnitary()); + + mat3.setRandom(); + VERIFY_IS_APPROX(v0.cross(mat3*v1), -(mat3*v1).cross(v0)); + VERIFY_IS_APPROX(v0.cross(mat3.lazyProduct(v1)), -(mat3.lazyProduct(v1)).cross(v0)); + + // colwise/rowwise cross product + mat3.setRandom(); + Vector3 vec3 = Vector3::Random(); + Matrix3 mcross; + int i = internal::random(0,2); + mcross = mat3.colwise().cross(vec3); + VERIFY_IS_APPROX(mcross.col(i), mat3.col(i).cross(vec3)); + + VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(vec3)).diagonal().cwiseAbs().sum(), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(Vector3::Random())).diagonal().cwiseAbs().sum(), Scalar(1)); + + VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * mat3.colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * Matrix3::Random().colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1)); + + mcross = mat3.rowwise().cross(vec3); + VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3)); + + // cross3 + Vector4 v40 = Vector4::Random(), + v41 = Vector4::Random(), + v42 = Vector4::Random(); + v40.w() = v41.w() = v42.w() = 0; + v42.template head<3>() = v40.template head<3>().cross(v41.template head<3>()); + VERIFY_IS_APPROX(v40.cross3(v41), v42); + VERIFY_IS_MUCH_SMALLER_THAN(v40.cross3(Vector4::Random()).dot(v40), Scalar(1)); + + // check mixed product + typedef Matrix RealVector3; + RealVector3 rv1 = RealVector3::Random(); + VERIFY_IS_APPROX(v1.cross(rv1.template cast()), v1.cross(rv1)); + VERIFY_IS_APPROX(rv1.template cast().cross(v1), rv1.cross(v1)); +} + +template void orthomethods(int size=Size) +{ + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + typedef Matrix Matrix3N; + typedef Matrix MatrixN3; + typedef Matrix Vector3; + + VectorType v0 = VectorType::Random(size); + + // unitOrthogonal + VERIFY_IS_MUCH_SMALLER_THAN(v0.unitOrthogonal().dot(v0), Scalar(1)); + VERIFY_IS_APPROX(v0.unitOrthogonal().norm(), RealScalar(1)); + + if (size>=3) + { + v0.template head<2>().setZero(); + v0.tail(size-2).setRandom(); + + VERIFY_IS_MUCH_SMALLER_THAN(v0.unitOrthogonal().dot(v0), Scalar(1)); + VERIFY_IS_APPROX(v0.unitOrthogonal().norm(), RealScalar(1)); + } + + // colwise/rowwise cross product + Vector3 vec3 = Vector3::Random(); + int i = internal::random(0,size-1); + + Matrix3N mat3N(3,size), mcross3N(3,size); + mat3N.setRandom(); + mcross3N = mat3N.colwise().cross(vec3); + VERIFY_IS_APPROX(mcross3N.col(i), mat3N.col(i).cross(vec3)); + + MatrixN3 matN3(size,3), mcrossN3(size,3); + matN3.setRandom(); + mcrossN3 = matN3.rowwise().cross(vec3); + VERIFY_IS_APPROX(mcrossN3.row(i), matN3.row(i).cross(vec3)); +} + +void test_geo_orthomethods() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( orthomethods_3() ); + CALL_SUBTEST_2( orthomethods_3() ); + CALL_SUBTEST_4( orthomethods_3 >() ); + CALL_SUBTEST_1( (orthomethods()) ); + CALL_SUBTEST_2( (orthomethods()) ); + CALL_SUBTEST_1( (orthomethods()) ); + CALL_SUBTEST_2( (orthomethods()) ); + CALL_SUBTEST_3( (orthomethods()) ); + CALL_SUBTEST_4( (orthomethods,8>()) ); + CALL_SUBTEST_5( (orthomethods(36)) ); + CALL_SUBTEST_6( (orthomethods(35)) ); + } +} diff --git a/thirdparty/eigen/test/geo_parametrizedline.cpp b/thirdparty/eigen/test/geo_parametrizedline.cpp new file mode 100644 index 000000000..9bf5f3c1d --- /dev/null +++ b/thirdparty/eigen/test/geo_parametrizedline.cpp @@ -0,0 +1,104 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +template void parametrizedline(const LineType& _line) +{ + /* this test covers the following files: + ParametrizedLine.h + */ + using std::abs; + typedef typename LineType::Index Index; + const Index dim = _line.dim(); + typedef typename LineType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + typedef Hyperplane HyperplaneType; + + VectorType p0 = VectorType::Random(dim); + VectorType p1 = VectorType::Random(dim); + + VectorType d0 = VectorType::Random(dim).normalized(); + + LineType l0(p0, d0); + + Scalar s0 = internal::random(); + Scalar s1 = abs(internal::random()); + + VERIFY_IS_MUCH_SMALLER_THAN( l0.distance(p0), RealScalar(1) ); + VERIFY_IS_MUCH_SMALLER_THAN( l0.distance(p0+s0*d0), RealScalar(1) ); + VERIFY_IS_APPROX( (l0.projection(p1)-p1).norm(), l0.distance(p1) ); + VERIFY_IS_MUCH_SMALLER_THAN( l0.distance(l0.projection(p1)), RealScalar(1) ); + VERIFY_IS_APPROX( Scalar(l0.distance((p0+s0*d0) + d0.unitOrthogonal() * s1)), s1 ); + + // casting + const int Dim = LineType::AmbientDimAtCompileTime; + typedef typename GetDifferentType::type OtherScalar; + ParametrizedLine hp1f = l0.template cast(); + VERIFY_IS_APPROX(hp1f.template cast(),l0); + ParametrizedLine hp1d = l0.template cast(); + VERIFY_IS_APPROX(hp1d.template cast(),l0); + + // intersections + VectorType p2 = VectorType::Random(dim); + VectorType n2 = VectorType::Random(dim).normalized(); + HyperplaneType hp(p2,n2); + Scalar t = l0.intersectionParameter(hp); + VectorType pi = l0.pointAt(t); + VERIFY_IS_MUCH_SMALLER_THAN(hp.signedDistance(pi), RealScalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(l0.distance(pi), RealScalar(1)); + VERIFY_IS_APPROX(l0.intersectionPoint(hp), pi); +} + +template void parametrizedline_alignment() +{ + typedef ParametrizedLine Line4a; + typedef ParametrizedLine Line4u; + + EIGEN_ALIGN_MAX Scalar array1[16]; + EIGEN_ALIGN_MAX Scalar array2[16]; + EIGEN_ALIGN_MAX Scalar array3[16+1]; + Scalar* array3u = array3+1; + + Line4a *p1 = ::new(reinterpret_cast(array1)) Line4a; + Line4u *p2 = ::new(reinterpret_cast(array2)) Line4u; + Line4u *p3 = ::new(reinterpret_cast(array3u)) Line4u; + + p1->origin().setRandom(); + p1->direction().setRandom(); + *p2 = *p1; + *p3 = *p1; + + VERIFY_IS_APPROX(p1->origin(), p2->origin()); + VERIFY_IS_APPROX(p1->origin(), p3->origin()); + VERIFY_IS_APPROX(p1->direction(), p2->direction()); + VERIFY_IS_APPROX(p1->direction(), p3->direction()); + + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(internal::packet_traits::Vectorizable && internal::packet_traits::size<=4) + VERIFY_RAISES_ASSERT((::new(reinterpret_cast(array3u)) Line4a)); + #endif +} + +void test_geo_parametrizedline() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( parametrizedline(ParametrizedLine()) ); + CALL_SUBTEST_2( parametrizedline(ParametrizedLine()) ); + CALL_SUBTEST_2( parametrizedline_alignment() ); + CALL_SUBTEST_3( parametrizedline(ParametrizedLine()) ); + CALL_SUBTEST_3( parametrizedline_alignment() ); + CALL_SUBTEST_4( parametrizedline(ParametrizedLine,5>()) ); + } +} diff --git a/thirdparty/eigen/test/geo_quaternion.cpp b/thirdparty/eigen/test/geo_quaternion.cpp new file mode 100644 index 000000000..96889e722 --- /dev/null +++ b/thirdparty/eigen/test/geo_quaternion.cpp @@ -0,0 +1,289 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2009 Mathieu Gautier +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +template T bounded_acos(T v) +{ + using std::acos; + using std::min; + using std::max; + return acos((max)(T(-1),(min)(v,T(1)))); +} + +template void check_slerp(const QuatType& q0, const QuatType& q1) +{ + using std::abs; + typedef typename QuatType::Scalar Scalar; + typedef AngleAxis AA; + + Scalar largeEps = test_precision(); + + Scalar theta_tot = AA(q1*q0.inverse()).angle(); + if(theta_tot>Scalar(EIGEN_PI)) + theta_tot = Scalar(2.)*Scalar(EIGEN_PI)-theta_tot; + for(Scalar t=0; t<=Scalar(1.001); t+=Scalar(0.1)) + { + QuatType q = q0.slerp(t,q1); + Scalar theta = AA(q*q0.inverse()).angle(); + VERIFY(abs(q.norm() - 1) < largeEps); + if(theta_tot==0) VERIFY(theta_tot==0); + else VERIFY(abs(theta - t * theta_tot) < largeEps); + } +} + +template void quaternion(void) +{ + /* this test covers the following files: + Quaternion.h + */ + using std::abs; + typedef Matrix Vector3; + typedef Matrix Matrix3; + typedef Quaternion Quaternionx; + typedef AngleAxis AngleAxisx; + + Scalar largeEps = test_precision(); + if (internal::is_same::value) + largeEps = Scalar(1e-3); + + Scalar eps = internal::random() * Scalar(1e-2); + + Vector3 v0 = Vector3::Random(), + v1 = Vector3::Random(), + v2 = Vector3::Random(), + v3 = Vector3::Random(); + + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)), + b = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + + // Quaternion: Identity(), setIdentity(); + Quaternionx q1, q2; + q2.setIdentity(); + VERIFY_IS_APPROX(Quaternionx(Quaternionx::Identity()).coeffs(), q2.coeffs()); + q1.coeffs().setRandom(); + VERIFY_IS_APPROX(q1.coeffs(), (q1*q2).coeffs()); + + // concatenation + q1 *= q2; + + q1 = AngleAxisx(a, v0.normalized()); + q2 = AngleAxisx(a, v1.normalized()); + + // angular distance + Scalar refangle = abs(AngleAxisx(q1.inverse()*q2).angle()); + if (refangle>Scalar(EIGEN_PI)) + refangle = Scalar(2)*Scalar(EIGEN_PI) - refangle; + + if((q1.coeffs()-q2.coeffs()).norm() > 10*largeEps) + { + VERIFY_IS_MUCH_SMALLER_THAN(abs(q1.angularDistance(q2) - refangle), Scalar(1)); + } + + // rotation matrix conversion + VERIFY_IS_APPROX(q1 * v2, q1.toRotationMatrix() * v2); + VERIFY_IS_APPROX(q1 * q2 * v2, + q1.toRotationMatrix() * q2.toRotationMatrix() * v2); + + VERIFY( (q2*q1).isApprox(q1*q2, largeEps) + || !(q2 * q1 * v2).isApprox(q1.toRotationMatrix() * q2.toRotationMatrix() * v2)); + + q2 = q1.toRotationMatrix(); + VERIFY_IS_APPROX(q1*v1,q2*v1); + + Matrix3 rot1(q1); + VERIFY_IS_APPROX(q1*v1,rot1*v1); + Quaternionx q3(rot1.transpose()*rot1); + VERIFY_IS_APPROX(q3*v1,v1); + + + // angle-axis conversion + AngleAxisx aa = AngleAxisx(q1); + VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); + + // Do not execute the test if the rotation angle is almost zero, or + // the rotation axis and v1 are almost parallel. + if (abs(aa.angle()) > 5*test_precision() + && (aa.axis() - v1.normalized()).norm() < Scalar(1.99) + && (aa.axis() + v1.normalized()).norm() < Scalar(1.99)) + { + VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1); + } + + // from two vector creation + VERIFY_IS_APPROX( v2.normalized(),(q2.setFromTwoVectors(v1, v2)*v1).normalized()); + VERIFY_IS_APPROX( v1.normalized(),(q2.setFromTwoVectors(v1, v1)*v1).normalized()); + VERIFY_IS_APPROX(-v1.normalized(),(q2.setFromTwoVectors(v1,-v1)*v1).normalized()); + if (internal::is_same::value) + { + v3 = (v1.array()+eps).matrix(); + VERIFY_IS_APPROX( v3.normalized(),(q2.setFromTwoVectors(v1, v3)*v1).normalized()); + VERIFY_IS_APPROX(-v3.normalized(),(q2.setFromTwoVectors(v1,-v3)*v1).normalized()); + } + + // from two vector creation static function + VERIFY_IS_APPROX( v2.normalized(),(Quaternionx::FromTwoVectors(v1, v2)*v1).normalized()); + VERIFY_IS_APPROX( v1.normalized(),(Quaternionx::FromTwoVectors(v1, v1)*v1).normalized()); + VERIFY_IS_APPROX(-v1.normalized(),(Quaternionx::FromTwoVectors(v1,-v1)*v1).normalized()); + if (internal::is_same::value) + { + v3 = (v1.array()+eps).matrix(); + VERIFY_IS_APPROX( v3.normalized(),(Quaternionx::FromTwoVectors(v1, v3)*v1).normalized()); + VERIFY_IS_APPROX(-v3.normalized(),(Quaternionx::FromTwoVectors(v1,-v3)*v1).normalized()); + } + + // inverse and conjugate + VERIFY_IS_APPROX(q1 * (q1.inverse() * v1), v1); + VERIFY_IS_APPROX(q1 * (q1.conjugate() * v1), v1); + + // test casting + Quaternion q1f = q1.template cast(); + VERIFY_IS_APPROX(q1f.template cast(),q1); + Quaternion q1d = q1.template cast(); + VERIFY_IS_APPROX(q1d.template cast(),q1); + + // test bug 369 - improper alignment. + Quaternionx *q = new Quaternionx; + delete q; + + q1 = Quaternionx::UnitRandom(); + q2 = Quaternionx::UnitRandom(); + check_slerp(q1,q2); + + q1 = AngleAxisx(b, v1.normalized()); + q2 = AngleAxisx(b+Scalar(EIGEN_PI), v1.normalized()); + check_slerp(q1,q2); + + q1 = AngleAxisx(b, v1.normalized()); + q2 = AngleAxisx(-b, -v1.normalized()); + check_slerp(q1,q2); + + q1 = Quaternionx::UnitRandom(); + q2.coeffs() = -q1.coeffs(); + check_slerp(q1,q2); +} + +template void mapQuaternion(void){ + typedef Map, Aligned> MQuaternionA; + typedef Map, Aligned> MCQuaternionA; + typedef Map > MQuaternionUA; + typedef Map > MCQuaternionUA; + typedef Quaternion Quaternionx; + typedef Matrix Vector3; + typedef AngleAxis AngleAxisx; + + Vector3 v0 = Vector3::Random(), + v1 = Vector3::Random(); + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + + EIGEN_ALIGN_MAX Scalar array1[4]; + EIGEN_ALIGN_MAX Scalar array2[4]; + EIGEN_ALIGN_MAX Scalar array3[4+1]; + Scalar* array3unaligned = array3+1; + + MQuaternionA mq1(array1); + MCQuaternionA mcq1(array1); + MQuaternionA mq2(array2); + MQuaternionUA mq3(array3unaligned); + MCQuaternionUA mcq3(array3unaligned); + +// std::cerr << array1 << " " << array2 << " " << array3 << "\n"; + mq1 = AngleAxisx(a, v0.normalized()); + mq2 = mq1; + mq3 = mq1; + + Quaternionx q1 = mq1; + Quaternionx q2 = mq2; + Quaternionx q3 = mq3; + Quaternionx q4 = MCQuaternionUA(array3unaligned); + + VERIFY_IS_APPROX(q1.coeffs(), q2.coeffs()); + VERIFY_IS_APPROX(q1.coeffs(), q3.coeffs()); + VERIFY_IS_APPROX(q4.coeffs(), q3.coeffs()); + #ifdef EIGEN_VECTORIZE + if(internal::packet_traits::Vectorizable) + VERIFY_RAISES_ASSERT((MQuaternionA(array3unaligned))); + #endif + + VERIFY_IS_APPROX(mq1 * (mq1.inverse() * v1), v1); + VERIFY_IS_APPROX(mq1 * (mq1.conjugate() * v1), v1); + + VERIFY_IS_APPROX(mcq1 * (mcq1.inverse() * v1), v1); + VERIFY_IS_APPROX(mcq1 * (mcq1.conjugate() * v1), v1); + + VERIFY_IS_APPROX(mq3 * (mq3.inverse() * v1), v1); + VERIFY_IS_APPROX(mq3 * (mq3.conjugate() * v1), v1); + + VERIFY_IS_APPROX(mcq3 * (mcq3.inverse() * v1), v1); + VERIFY_IS_APPROX(mcq3 * (mcq3.conjugate() * v1), v1); + + VERIFY_IS_APPROX(mq1*mq2, q1*q2); + VERIFY_IS_APPROX(mq3*mq2, q3*q2); + VERIFY_IS_APPROX(mcq1*mq2, q1*q2); + VERIFY_IS_APPROX(mcq3*mq2, q3*q2); +} + +template void quaternionAlignment(void){ + typedef Quaternion QuaternionA; + typedef Quaternion QuaternionUA; + + EIGEN_ALIGN_MAX Scalar array1[4]; + EIGEN_ALIGN_MAX Scalar array2[4]; + EIGEN_ALIGN_MAX Scalar array3[4+1]; + Scalar* arrayunaligned = array3+1; + + QuaternionA *q1 = ::new(reinterpret_cast(array1)) QuaternionA; + QuaternionUA *q2 = ::new(reinterpret_cast(array2)) QuaternionUA; + QuaternionUA *q3 = ::new(reinterpret_cast(arrayunaligned)) QuaternionUA; + + q1->coeffs().setRandom(); + *q2 = *q1; + *q3 = *q1; + + VERIFY_IS_APPROX(q1->coeffs(), q2->coeffs()); + VERIFY_IS_APPROX(q1->coeffs(), q3->coeffs()); + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(internal::packet_traits::Vectorizable && internal::packet_traits::size<=4) + VERIFY_RAISES_ASSERT((::new(reinterpret_cast(arrayunaligned)) QuaternionA)); + #endif +} + +template void check_const_correctness(const PlainObjectType&) +{ + // there's a lot that we can't test here while still having this test compile! + // the only possible approach would be to run a script trying to compile stuff and checking that it fails. + // CMake can help with that. + + // verify that map-to-const don't have LvalueBit + typedef typename internal::add_const::type ConstPlainObjectType; + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(Map::Flags & LvalueBit) ); + VERIFY( !(Map::Flags & LvalueBit) ); +} + +void test_geo_quaternion() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( quaternion() )); + CALL_SUBTEST_1( check_const_correctness(Quaternionf()) ); + CALL_SUBTEST_2(( quaternion() )); + CALL_SUBTEST_2( check_const_correctness(Quaterniond()) ); + CALL_SUBTEST_3(( quaternion() )); + CALL_SUBTEST_4(( quaternion() )); + CALL_SUBTEST_5(( quaternionAlignment() )); + CALL_SUBTEST_6(( quaternionAlignment() )); + CALL_SUBTEST_1( mapQuaternion() ); + CALL_SUBTEST_2( mapQuaternion() ); + } +} diff --git a/thirdparty/eigen/test/geo_transformations.cpp b/thirdparty/eigen/test/geo_transformations.cpp new file mode 100755 index 000000000..278e527c2 --- /dev/null +++ b/thirdparty/eigen/test/geo_transformations.cpp @@ -0,0 +1,645 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +template +Matrix angleToVec(T a) +{ + return Matrix(std::cos(a), std::sin(a)); +} + +// This permits to workaround a bug in clang/llvm code generation. +template +EIGEN_DONT_INLINE +void dont_over_optimize(T& x) { volatile typename T::Scalar tmp = x(0); x(0) = tmp; } + +template void non_projective_only() +{ + /* this test covers the following files: + Cross.h Quaternion.h, Transform.cpp + */ + typedef Matrix Vector3; + typedef Quaternion Quaternionx; + typedef AngleAxis AngleAxisx; + typedef Transform Transform3; + typedef DiagonalMatrix AlignedScaling3; + typedef Translation Translation3; + + Vector3 v0 = Vector3::Random(), + v1 = Vector3::Random(); + + Transform3 t0, t1, t2; + + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + + Quaternionx q1, q2; + + q1 = AngleAxisx(a, v0.normalized()); + + t0 = Transform3::Identity(); + VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); + + t0.linear() = q1.toRotationMatrix(); + + v0 << 50, 2, 1; + t0.scale(v0); + + VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).template head<3>().norm(), v0.x()); + + t0.setIdentity(); + t1.setIdentity(); + v1 << 1, 2, 3; + t0.linear() = q1.toRotationMatrix(); + t0.pretranslate(v0); + t0.scale(v1); + t1.linear() = q1.conjugate().toRotationMatrix(); + t1.prescale(v1.cwiseInverse()); + t1.translate(-v0); + + VERIFY((t0 * t1).matrix().isIdentity(test_precision())); + + t1.fromPositionOrientationScale(v0, q1, v1); + VERIFY_IS_APPROX(t1.matrix(), t0.matrix()); + VERIFY_IS_APPROX(t1*v1, t0*v1); + + // translation * vector + t0.setIdentity(); + t0.translate(v0); + VERIFY_IS_APPROX((t0 * v1).template head<3>(), Translation3(v0) * v1); + + // AlignedScaling * vector + t0.setIdentity(); + t0.scale(v0); + VERIFY_IS_APPROX((t0 * v1).template head<3>(), AlignedScaling3(v0) * v1); +} + +template void transformations() +{ + /* this test covers the following files: + Cross.h Quaternion.h, Transform.cpp + */ + using std::cos; + using std::abs; + typedef Matrix Matrix3; + typedef Matrix Matrix4; + typedef Matrix Vector2; + typedef Matrix Vector3; + typedef Matrix Vector4; + typedef Quaternion Quaternionx; + typedef AngleAxis AngleAxisx; + typedef Transform Transform2; + typedef Transform Transform3; + typedef typename Transform3::MatrixType MatrixType; + typedef DiagonalMatrix AlignedScaling3; + typedef Translation Translation2; + typedef Translation Translation3; + + Vector3 v0 = Vector3::Random(), + v1 = Vector3::Random(); + Matrix3 matrot1, m; + + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Scalar s0 = internal::random(), s1 = internal::random(); + + while(v0.norm() < test_precision()) v0 = Vector3::Random(); + while(v1.norm() < test_precision()) v1 = Vector3::Random(); + + VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0); + VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(EIGEN_PI), v0.unitOrthogonal()) * v0); + if(abs(cos(a)) > test_precision()) + { + VERIFY_IS_APPROX(cos(a)*v0.squaredNorm(), v0.dot(AngleAxisx(a, v0.unitOrthogonal()) * v0)); + } + m = AngleAxisx(a, v0.normalized()).toRotationMatrix().adjoint(); + VERIFY_IS_APPROX(Matrix3::Identity(), m * AngleAxisx(a, v0.normalized())); + VERIFY_IS_APPROX(Matrix3::Identity(), AngleAxisx(a, v0.normalized()) * m); + + Quaternionx q1, q2; + q1 = AngleAxisx(a, v0.normalized()); + q2 = AngleAxisx(a, v1.normalized()); + + // rotation matrix conversion + matrot1 = AngleAxisx(Scalar(0.1), Vector3::UnitX()) + * AngleAxisx(Scalar(0.2), Vector3::UnitY()) + * AngleAxisx(Scalar(0.3), Vector3::UnitZ()); + VERIFY_IS_APPROX(matrot1 * v1, + AngleAxisx(Scalar(0.1), Vector3(1,0,0)).toRotationMatrix() + * (AngleAxisx(Scalar(0.2), Vector3(0,1,0)).toRotationMatrix() + * (AngleAxisx(Scalar(0.3), Vector3(0,0,1)).toRotationMatrix() * v1))); + + // angle-axis conversion + AngleAxisx aa = AngleAxisx(q1); + VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); + + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) + { + VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); + } + + aa.fromRotationMatrix(aa.toRotationMatrix()); + VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) + { + VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); + } + + // AngleAxis + VERIFY_IS_APPROX(AngleAxisx(a,v1.normalized()).toRotationMatrix(), + Quaternionx(AngleAxisx(a,v1.normalized())).toRotationMatrix()); + + AngleAxisx aa1; + m = q1.toRotationMatrix(); + aa1 = m; + VERIFY_IS_APPROX(AngleAxisx(m).toRotationMatrix(), + Quaternionx(m).toRotationMatrix()); + + // Transform + // TODO complete the tests ! + a = 0; + while (abs(a)(-Scalar(0.4)*Scalar(EIGEN_PI), Scalar(0.4)*Scalar(EIGEN_PI)); + q1 = AngleAxisx(a, v0.normalized()); + Transform3 t0, t1, t2; + + // first test setIdentity() and Identity() + t0.setIdentity(); + VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); + t0.matrix().setZero(); + t0 = Transform3::Identity(); + VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); + + t0.setIdentity(); + t1.setIdentity(); + v1 << 1, 2, 3; + t0.linear() = q1.toRotationMatrix(); + t0.pretranslate(v0); + t0.scale(v1); + t1.linear() = q1.conjugate().toRotationMatrix(); + t1.prescale(v1.cwiseInverse()); + t1.translate(-v0); + + VERIFY((t0 * t1).matrix().isIdentity(test_precision())); + + t1.fromPositionOrientationScale(v0, q1, v1); + VERIFY_IS_APPROX(t1.matrix(), t0.matrix()); + + t0.setIdentity(); t0.scale(v0).rotate(q1.toRotationMatrix()); + t1.setIdentity(); t1.scale(v0).rotate(q1); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0.setIdentity(); t0.scale(v0).rotate(AngleAxisx(q1)); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + VERIFY_IS_APPROX(t0.scale(a).matrix(), t1.scale(Vector3::Constant(a)).matrix()); + VERIFY_IS_APPROX(t0.prescale(a).matrix(), t1.prescale(Vector3::Constant(a)).matrix()); + + // More transform constructors, operator=, operator*= + + Matrix3 mat3 = Matrix3::Random(); + Matrix4 mat4; + mat4 << mat3 , Vector3::Zero() , Vector4::Zero().transpose(); + Transform3 tmat3(mat3), tmat4(mat4); + if(Mode!=int(AffineCompact)) + tmat4.matrix()(3,3) = Scalar(1); + VERIFY_IS_APPROX(tmat3.matrix(), tmat4.matrix()); + + Scalar a3 = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Vector3 v3 = Vector3::Random().normalized(); + AngleAxisx aa3(a3, v3); + Transform3 t3(aa3); + Transform3 t4; + t4 = aa3; + VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); + t4.rotate(AngleAxisx(-a3,v3)); + VERIFY_IS_APPROX(t4.matrix(), MatrixType::Identity()); + t4 *= aa3; + VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); + + do { + v3 = Vector3::Random(); + dont_over_optimize(v3); + } while (v3.cwiseAbs().minCoeff()::epsilon()); + Translation3 tv3(v3); + Transform3 t5(tv3); + t4 = tv3; + VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); + t4.translate((-v3).eval()); + VERIFY_IS_APPROX(t4.matrix(), MatrixType::Identity()); + t4 *= tv3; + VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); + + AlignedScaling3 sv3(v3); + Transform3 t6(sv3); + t4 = sv3; + VERIFY_IS_APPROX(t6.matrix(), t4.matrix()); + t4.scale(v3.cwiseInverse()); + VERIFY_IS_APPROX(t4.matrix(), MatrixType::Identity()); + t4 *= sv3; + VERIFY_IS_APPROX(t6.matrix(), t4.matrix()); + + // matrix * transform + VERIFY_IS_APPROX((t3.matrix()*t4).matrix(), (t3*t4).matrix()); + + // chained Transform product + VERIFY_IS_APPROX(((t3*t4)*t5).matrix(), (t3*(t4*t5)).matrix()); + + // check that Transform product doesn't have aliasing problems + t5 = t4; + t5 = t5*t5; + VERIFY_IS_APPROX(t5, t4*t4); + + // 2D transformation + Transform2 t20, t21; + Vector2 v20 = Vector2::Random(); + Vector2 v21 = Vector2::Random(); + for (int k=0; k<2; ++k) + if (abs(v21[k])(a).toRotationMatrix(); + VERIFY_IS_APPROX(t20.fromPositionOrientationScale(v20,a,v21).matrix(), + t21.pretranslate(v20).scale(v21).matrix()); + + t21.setIdentity(); + t21.linear() = Rotation2D(-a).toRotationMatrix(); + VERIFY( (t20.fromPositionOrientationScale(v20,a,v21) + * (t21.prescale(v21.cwiseInverse()).translate(-v20))).matrix().isIdentity(test_precision()) ); + + // Transform - new API + // 3D + t0.setIdentity(); + t0.rotate(q1).scale(v0).translate(v0); + // mat * aligned scaling and mat * translation + t1 = (Matrix3(q1) * AlignedScaling3(v0)) * Translation3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t1 = (Matrix3(q1) * Eigen::Scaling(v0)) * Translation3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t1 = (q1 * Eigen::Scaling(v0)) * Translation3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + // mat * transformation and aligned scaling * translation + t1 = Matrix3(q1) * (AlignedScaling3(v0) * Translation3(v0)); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + + t0.setIdentity(); + t0.scale(s0).translate(v0); + t1 = Eigen::Scaling(s0) * Translation3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t0.prescale(s0); + t1 = Eigen::Scaling(s0) * t1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0 = t3; + t0.scale(s0); + t1 = t3 * Eigen::Scaling(s0,s0,s0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t0.prescale(s0); + t1 = Eigen::Scaling(s0,s0,s0) * t1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0 = t3; + t0.scale(s0); + t1 = t3 * Eigen::Scaling(s0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t0.prescale(s0); + t1 = Eigen::Scaling(s0) * t1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0.setIdentity(); + t0.prerotate(q1).prescale(v0).pretranslate(v0); + // translation * aligned scaling and transformation * mat + t1 = (Translation3(v0) * AlignedScaling3(v0)) * Transform3(q1); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + // scaling * mat and translation * mat + t1 = Translation3(v0) * (AlignedScaling3(v0) * Transform3(q1)); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + t0.setIdentity(); + t0.scale(v0).translate(v0).rotate(q1); + // translation * mat and aligned scaling * transformation + t1 = AlignedScaling3(v0) * (Translation3(v0) * Transform3(q1)); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + // transformation * aligned scaling + t0.scale(v0); + t1 *= AlignedScaling3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + t1 = AlignedScaling3(v0) * (Translation3(v0) * Transform3(q1)); + t1 = t1 * v0.asDiagonal(); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + // transformation * translation + t0.translate(v0); + t1 = t1 * Translation3(v0); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + // translation * transformation + t0.pretranslate(v0); + t1 = Translation3(v0) * t1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // transform * quaternion + t0.rotate(q1); + t1 = t1 * q1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // translation * quaternion + t0.translate(v1).rotate(q1); + t1 = t1 * (Translation3(v1) * q1); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // aligned scaling * quaternion + t0.scale(v1).rotate(q1); + t1 = t1 * (AlignedScaling3(v1) * q1); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // quaternion * transform + t0.prerotate(q1); + t1 = q1 * t1; + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // quaternion * translation + t0.rotate(q1).translate(v1); + t1 = t1 * (q1 * Translation3(v1)); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // quaternion * aligned scaling + t0.rotate(q1).scale(v1); + t1 = t1 * (q1 * AlignedScaling3(v1)); + VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); + + // test transform inversion + t0.setIdentity(); + t0.translate(v0); + do { + t0.linear().setRandom(); + } while(t0.linear().jacobiSvd().singularValues()(2)()); + Matrix4 t044 = Matrix4::Zero(); + t044(3,3) = 1; + t044.block(0,0,t0.matrix().rows(),4) = t0.matrix(); + VERIFY_IS_APPROX(t0.inverse(Affine).matrix(), t044.inverse().block(0,0,t0.matrix().rows(),4)); + t0.setIdentity(); + t0.translate(v0).rotate(q1); + t044 = Matrix4::Zero(); + t044(3,3) = 1; + t044.block(0,0,t0.matrix().rows(),4) = t0.matrix(); + VERIFY_IS_APPROX(t0.inverse(Isometry).matrix(), t044.inverse().block(0,0,t0.matrix().rows(),4)); + + Matrix3 mat_rotation, mat_scaling; + t0.setIdentity(); + t0.translate(v0).rotate(q1).scale(v1); + t0.computeRotationScaling(&mat_rotation, &mat_scaling); + VERIFY_IS_APPROX(t0.linear(), mat_rotation * mat_scaling); + VERIFY_IS_APPROX(mat_rotation*mat_rotation.adjoint(), Matrix3::Identity()); + VERIFY_IS_APPROX(mat_rotation.determinant(), Scalar(1)); + t0.computeScalingRotation(&mat_scaling, &mat_rotation); + VERIFY_IS_APPROX(t0.linear(), mat_scaling * mat_rotation); + VERIFY_IS_APPROX(mat_rotation*mat_rotation.adjoint(), Matrix3::Identity()); + VERIFY_IS_APPROX(mat_rotation.determinant(), Scalar(1)); + + // test casting + Transform t1f = t1.template cast(); + VERIFY_IS_APPROX(t1f.template cast(),t1); + Transform t1d = t1.template cast(); + VERIFY_IS_APPROX(t1d.template cast(),t1); + + Translation3 tr1(v0); + Translation tr1f = tr1.template cast(); + VERIFY_IS_APPROX(tr1f.template cast(),tr1); + Translation tr1d = tr1.template cast(); + VERIFY_IS_APPROX(tr1d.template cast(),tr1); + + AngleAxis aa1f = aa1.template cast(); + VERIFY_IS_APPROX(aa1f.template cast(),aa1); + AngleAxis aa1d = aa1.template cast(); + VERIFY_IS_APPROX(aa1d.template cast(),aa1); + + Rotation2D r2d1(internal::random()); + Rotation2D r2d1f = r2d1.template cast(); + VERIFY_IS_APPROX(r2d1f.template cast(),r2d1); + Rotation2D r2d1d = r2d1.template cast(); + VERIFY_IS_APPROX(r2d1d.template cast(),r2d1); + + for(int k=0; k<100; ++k) + { + Scalar angle = internal::random(-100,100); + Rotation2D rot2(angle); + VERIFY( rot2.smallestPositiveAngle() >= 0 ); + VERIFY( rot2.smallestPositiveAngle() <= Scalar(2)*Scalar(EIGEN_PI) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestPositiveAngle()), angleToVec(rot2.angle()) ); + + VERIFY( rot2.smallestAngle() >= -Scalar(EIGEN_PI) ); + VERIFY( rot2.smallestAngle() <= Scalar(EIGEN_PI) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot2.angle()) ); + + Matrix rot2_as_mat(rot2); + Rotation2D rot3(rot2_as_mat); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot3.angle()) ); + } + + s0 = internal::random(-100,100); + s1 = internal::random(-100,100); + Rotation2D R0(s0), R1(s1); + + t20 = Translation2(v20) * (R0 * Eigen::Scaling(s0)); + t21 = Translation2(v20) * R0 * Eigen::Scaling(s0); + VERIFY_IS_APPROX(t20,t21); + + t20 = Translation2(v20) * (R0 * R0.inverse() * Eigen::Scaling(s0)); + t21 = Translation2(v20) * Eigen::Scaling(s0); + VERIFY_IS_APPROX(t20,t21); + + VERIFY_IS_APPROX(s0, (R0.slerp(0, R1)).angle()); + VERIFY_IS_APPROX( angleToVec(R1.smallestPositiveAngle()), angleToVec((R0.slerp(1, R1)).smallestPositiveAngle()) ); + VERIFY_IS_APPROX(R0.smallestPositiveAngle(), (R0.slerp(0.5, R0)).smallestPositiveAngle()); + + if(std::cos(s0)>0) + VERIFY_IS_MUCH_SMALLER_THAN((R0.slerp(0.5, R0.inverse())).smallestAngle(), Scalar(1)); + else + VERIFY_IS_APPROX(Scalar(EIGEN_PI), (R0.slerp(0.5, R0.inverse())).smallestPositiveAngle()); + + // Check path length + Scalar l = 0; + int path_steps = 100; + for(int k=0; k::epsilon()*Scalar(path_steps/2))); + + // check basic features + { + Rotation2D r1; // default ctor + r1 = Rotation2D(s0); // copy assignment + VERIFY_IS_APPROX(r1.angle(),s0); + Rotation2D r2(r1); // copy ctor + VERIFY_IS_APPROX(r2.angle(),s0); + } + + { + Transform3 t32(Matrix4::Random()), t33, t34; + t34 = t33 = t32; + t32.scale(v0); + t33*=AlignedScaling3(v0); + VERIFY_IS_APPROX(t32.matrix(), t33.matrix()); + t33 = t34 * AlignedScaling3(v0); + VERIFY_IS_APPROX(t32.matrix(), t33.matrix()); + } + +} + +template +void transform_associativity_left(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h) +{ + VERIFY_IS_APPROX( q*(a1*v), (q*a1)*v ); + VERIFY_IS_APPROX( q*(a2*v), (q*a2)*v ); + VERIFY_IS_APPROX( q*(p*h).hnormalized(), ((q*p)*h).hnormalized() ); +} + +template +void transform_associativity2(const A1& a1, const A2& a2, const P& p, const Q& q, const V& v, const H& h) +{ + VERIFY_IS_APPROX( a1*(q*v), (a1*q)*v ); + VERIFY_IS_APPROX( a2*(q*v), (a2*q)*v ); + VERIFY_IS_APPROX( p *(q*v).homogeneous(), (p *q)*v.homogeneous() ); + + transform_associativity_left(a1, a2,p, q, v, h); +} + +template +void transform_associativity(const RotationType& R) +{ + typedef Matrix VectorType; + typedef Matrix HVectorType; + typedef Matrix LinearType; + typedef Matrix MatrixType; + typedef Transform AffineCompactType; + typedef Transform AffineType; + typedef Transform ProjectiveType; + typedef DiagonalMatrix ScalingType; + typedef Translation TranslationType; + + AffineCompactType A1c; A1c.matrix().setRandom(); + AffineCompactType A2c; A2c.matrix().setRandom(); + AffineType A1(A1c); + AffineType A2(A2c); + ProjectiveType P1; P1.matrix().setRandom(); + VectorType v1 = VectorType::Random(); + VectorType v2 = VectorType::Random(); + HVectorType h1 = HVectorType::Random(); + Scalar s1 = internal::random(); + LinearType L = LinearType::Random(); + MatrixType M = MatrixType::Random(); + + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, A2c, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, v1.asDiagonal(), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, ScalingType(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, Scaling(s1), v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, TranslationType(v1), v2, h1) ); + CALL_SUBTEST( transform_associativity_left(A1c, A1, P1, L, v2, h1) ); + CALL_SUBTEST( transform_associativity2(A1c, A1, P1, R, v2, h1) ); + + VERIFY_IS_APPROX( A1*(M*h1), (A1*M)*h1 ); + VERIFY_IS_APPROX( A1c*(M*h1), (A1c*M)*h1 ); + VERIFY_IS_APPROX( P1*(M*h1), (P1*M)*h1 ); + + VERIFY_IS_APPROX( M*(A1*h1), (M*A1)*h1 ); + VERIFY_IS_APPROX( M*(A1c*h1), (M*A1c)*h1 ); + VERIFY_IS_APPROX( M*(P1*h1), ((M*P1)*h1) ); +} + +template void transform_alignment() +{ + typedef Transform Projective3a; + typedef Transform Projective3u; + + EIGEN_ALIGN_MAX Scalar array1[16]; + EIGEN_ALIGN_MAX Scalar array2[16]; + EIGEN_ALIGN_MAX Scalar array3[16+1]; + Scalar* array3u = array3+1; + + Projective3a *p1 = ::new(reinterpret_cast(array1)) Projective3a; + Projective3u *p2 = ::new(reinterpret_cast(array2)) Projective3u; + Projective3u *p3 = ::new(reinterpret_cast(array3u)) Projective3u; + + p1->matrix().setRandom(); + *p2 = *p1; + *p3 = *p1; + + VERIFY_IS_APPROX(p1->matrix(), p2->matrix()); + VERIFY_IS_APPROX(p1->matrix(), p3->matrix()); + + VERIFY_IS_APPROX( (*p1) * (*p1), (*p2)*(*p3)); + + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(internal::packet_traits::Vectorizable) + VERIFY_RAISES_ASSERT((::new(reinterpret_cast(array3u)) Projective3a)); + #endif +} + +template void transform_products() +{ + typedef Matrix Mat; + typedef Transform Proj; + typedef Transform Aff; + typedef Transform AffC; + + Proj p; p.matrix().setRandom(); + Aff a; a.linear().setRandom(); a.translation().setRandom(); + AffC ac = a; + + Mat p_m(p.matrix()), a_m(a.matrix()); + + VERIFY_IS_APPROX((p*p).matrix(), p_m*p_m); + VERIFY_IS_APPROX((a*a).matrix(), a_m*a_m); + VERIFY_IS_APPROX((p*a).matrix(), p_m*a_m); + VERIFY_IS_APPROX((a*p).matrix(), a_m*p_m); + VERIFY_IS_APPROX((ac*a).matrix(), a_m*a_m); + VERIFY_IS_APPROX((a*ac).matrix(), a_m*a_m); + VERIFY_IS_APPROX((p*ac).matrix(), p_m*a_m); + VERIFY_IS_APPROX((ac*p).matrix(), a_m*p_m); +} + +void test_geo_transformations() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( transformations() )); + CALL_SUBTEST_1(( non_projective_only() )); + + CALL_SUBTEST_2(( transformations() )); + CALL_SUBTEST_2(( non_projective_only() )); + CALL_SUBTEST_2(( transform_alignment() )); + + CALL_SUBTEST_3(( transformations() )); + CALL_SUBTEST_3(( transformations() )); + CALL_SUBTEST_3(( transform_alignment() )); + + CALL_SUBTEST_4(( transformations() )); + CALL_SUBTEST_4(( non_projective_only() )); + + CALL_SUBTEST_5(( transformations() )); + CALL_SUBTEST_5(( non_projective_only() )); + + CALL_SUBTEST_6(( transformations() )); + CALL_SUBTEST_6(( transformations() )); + + + CALL_SUBTEST_7(( transform_products() )); + CALL_SUBTEST_7(( transform_products() )); + + CALL_SUBTEST_8(( transform_associativity(Rotation2D(internal::random()*double(EIGEN_PI))) )); + CALL_SUBTEST_8(( transform_associativity(Quaterniond::UnitRandom()) )); + } +} diff --git a/thirdparty/eigen/test/half_float.cpp b/thirdparty/eigen/test/half_float.cpp new file mode 100644 index 000000000..f8d438e2f --- /dev/null +++ b/thirdparty/eigen/test/half_float.cpp @@ -0,0 +1,252 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include "main.h" + +#include + +// Make sure it's possible to forward declare Eigen::half +namespace Eigen { +struct half; +} + +using Eigen::half; + +void test_conversion() +{ + using Eigen::half_impl::__half; + + // Conversion from float. + VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); + VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); + VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); + VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); + VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); + VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. + + // Denormals. + VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); + VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001); + VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); + + // Verify round-to-nearest-even behavior. + float val1 = float(half(__half(0x3c00))); + float val2 = float(half(__half(0x3c01))); + float val3 = float(half(__half(0x3c02))); + VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02); + + // Conversion from int. + VERIFY_IS_EQUAL(half(-1).x, 0xbc00); + VERIFY_IS_EQUAL(half(0).x, 0x0000); + VERIFY_IS_EQUAL(half(1).x, 0x3c00); + VERIFY_IS_EQUAL(half(2).x, 0x4000); + VERIFY_IS_EQUAL(half(3).x, 0x4200); + + // Conversion from bool. + VERIFY_IS_EQUAL(half(false).x, 0x0000); + VERIFY_IS_EQUAL(half(true).x, 0x3c00); + + // Conversion to float. + VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f); + VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f); + + // Denormals. + VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f); + + // NaNs and infinities. + VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. + VERIFY(!(numext::isnan)(float(half(0.0f)))); + VERIFY((numext::isinf)(float(half(__half(0xfc00))))); + VERIFY((numext::isnan)(float(half(__half(0xfc01))))); + VERIFY((numext::isinf)(float(half(__half(0x7c00))))); + VERIFY((numext::isnan)(float(half(__half(0x7c01))))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY((numext::isnan)(float(half(0.0 / 0.0)))); + VERIFY((numext::isinf)(float(half(1.0 / 0.0)))); + VERIFY((numext::isinf)(float(half(-1.0 / 0.0)))); +#endif + + // Exactly same checks as above, just directly on the half representation. + VERIFY(!(numext::isinf)(half(__half(0x7bff)))); + VERIFY(!(numext::isnan)(half(__half(0x0000)))); + VERIFY((numext::isinf)(half(__half(0xfc00)))); + VERIFY((numext::isnan)(half(__half(0xfc01)))); + VERIFY((numext::isinf)(half(__half(0x7c00)))); + VERIFY((numext::isnan)(half(__half(0x7c01)))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY((numext::isnan)(half(0.0 / 0.0))); + VERIFY((numext::isinf)(half(1.0 / 0.0))); + VERIFY((numext::isinf)(half(-1.0 / 0.0))); +#endif +} + +void test_numtraits() +{ + std::cout << "epsilon = " << NumTraits::epsilon() << std::endl; + std::cout << "highest = " << NumTraits::highest() << std::endl; + std::cout << "lowest = " << NumTraits::lowest() << std::endl; + std::cout << "inifinty = " << NumTraits::infinity() << std::endl; + std::cout << "nan = " << NumTraits::quiet_NaN() << std::endl; + +} + +void test_arithmetic() +{ + VERIFY_IS_EQUAL(float(half(2) + half(2)), 4); + VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0); + VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f); + VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f); + VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f); + VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f); + VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f); +} + +void test_comparison() +{ + VERIFY(half(1.0f) > half(0.5f)); + VERIFY(half(0.5f) < half(1.0f)); + VERIFY(!(half(1.0f) < half(0.5f))); + VERIFY(!(half(0.5f) > half(1.0f))); + + VERIFY(!(half(4.0f) > half(4.0f))); + VERIFY(!(half(4.0f) < half(4.0f))); + + VERIFY(!(half(0.0f) < half(-0.0f))); + VERIFY(!(half(-0.0f) < half(0.0f))); + VERIFY(!(half(0.0f) > half(-0.0f))); + VERIFY(!(half(-0.0f) > half(0.0f))); + + VERIFY(half(0.2f) > half(-1.0f)); + VERIFY(half(-1.0f) < half(0.2f)); + VERIFY(half(-16.0f) < half(-15.0f)); + + VERIFY(half(1.0f) == half(1.0f)); + VERIFY(half(1.0f) != half(2.0f)); + + // Comparisons with NaNs and infinities. +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0))); + VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0)); + + VERIFY(!(half(1.0) == half(0.0 / 0.0))); + VERIFY(!(half(1.0) < half(0.0 / 0.0))); + VERIFY(!(half(1.0) > half(0.0 / 0.0))); + VERIFY(half(1.0) != half(0.0 / 0.0)); + + VERIFY(half(1.0) < half(1.0 / 0.0)); + VERIFY(half(1.0) > half(-1.0 / 0.0)); +#endif +} + +void test_basic_functions() +{ + VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(abs(half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(abs(half(-3.5f))), 3.5f); + + VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f); + VERIFY_IS_EQUAL(float(floor(half(-3.5f))), -4.0f); + + VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f); + VERIFY_IS_EQUAL(float(ceil(half(-3.5f))), -3.0f); + + VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f); + VERIFY_IS_APPROX(float(sqrt(half(4.0f))), 2.0f); + + VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f); + VERIFY_IS_APPROX(float(pow(half(2.0f), half(2.0f))), 4.0f); + + VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); + VERIFY_IS_EQUAL(float(exp(half(0.0f))), 1.0f); + VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); + VERIFY_IS_APPROX(float(exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); + + VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log(half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); + VERIFY_IS_APPROX(float(log(half(10.0f))), 2.30273f); + + VERIFY_IS_EQUAL(float(numext::log1p(half(0.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log1p(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log1p(half(10.0f))), 2.3978953f); + VERIFY_IS_APPROX(float(log1p(half(10.0f))), 2.3978953f); +} + +void test_trigonometric_functions() +{ + VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI))); + //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f))); + + VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f))); + VERIFY_IS_APPROX(sin(half(0.0f)), half(sinf(0.0f))); + // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI))); + VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f))); + + VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f))); + VERIFY_IS_APPROX(tan(half(0.0f)), half(tanf(0.0f))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f))); +} + +void test_array() +{ + typedef Array ArrayXh; + Index size = internal::random(1,10); + Index i = internal::random(0,size-1); + ArrayXh a1 = ArrayXh::Random(size), a2 = ArrayXh::Random(size); + VERIFY_IS_APPROX( a1+a1, half(2)*a1 ); + VERIFY( (a1.abs() >= half(0)).all() ); + VERIFY_IS_APPROX( (a1*a1).sqrt(), a1.abs() ); + + VERIFY( ((a1.min)(a2) <= (a1.max)(a2)).all() ); + a1(i) = half(-10.); + VERIFY_IS_EQUAL( a1.minCoeff(), half(-10.) ); + a1(i) = half(10.); + VERIFY_IS_EQUAL( a1.maxCoeff(), half(10.) ); + + std::stringstream ss; + ss << a1; +} + +void test_half_float() +{ + CALL_SUBTEST(test_conversion()); + CALL_SUBTEST(test_numtraits()); + CALL_SUBTEST(test_arithmetic()); + CALL_SUBTEST(test_comparison()); + CALL_SUBTEST(test_basic_functions()); + CALL_SUBTEST(test_trigonometric_functions()); + CALL_SUBTEST(test_array()); +} diff --git a/thirdparty/eigen/test/hessenberg.cpp b/thirdparty/eigen/test/hessenberg.cpp new file mode 100644 index 000000000..96bc19e2e --- /dev/null +++ b/thirdparty/eigen/test/hessenberg.cpp @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2010 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void hessenberg(int size = Size) +{ + typedef Matrix MatrixType; + + // Test basic functionality: A = U H U* and H is Hessenberg + for(int counter = 0; counter < g_repeat; ++counter) { + MatrixType m = MatrixType::Random(size,size); + HessenbergDecomposition hess(m); + MatrixType Q = hess.matrixQ(); + MatrixType H = hess.matrixH(); + VERIFY_IS_APPROX(m, Q * H * Q.adjoint()); + for(int row = 2; row < size; ++row) { + for(int col = 0; col < row-1; ++col) { + VERIFY(H(row,col) == (typename MatrixType::Scalar)0); + } + } + } + + // Test whether compute() and constructor returns same result + MatrixType A = MatrixType::Random(size, size); + HessenbergDecomposition cs1; + cs1.compute(A); + HessenbergDecomposition cs2(A); + VERIFY_IS_EQUAL(cs1.matrixH().eval(), cs2.matrixH().eval()); + MatrixType cs1Q = cs1.matrixQ(); + MatrixType cs2Q = cs2.matrixQ(); + VERIFY_IS_EQUAL(cs1Q, cs2Q); + + // Test assertions for when used uninitialized + HessenbergDecomposition hessUninitialized; + VERIFY_RAISES_ASSERT( hessUninitialized.matrixH() ); + VERIFY_RAISES_ASSERT( hessUninitialized.matrixQ() ); + VERIFY_RAISES_ASSERT( hessUninitialized.householderCoefficients() ); + VERIFY_RAISES_ASSERT( hessUninitialized.packedMatrix() ); + + // TODO: Add tests for packedMatrix() and householderCoefficients() +} + +void test_hessenberg() +{ + CALL_SUBTEST_1(( hessenberg,1>() )); + CALL_SUBTEST_2(( hessenberg,2>() )); + CALL_SUBTEST_3(( hessenberg,4>() )); + CALL_SUBTEST_4(( hessenberg(internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + CALL_SUBTEST_5(( hessenberg,Dynamic>(internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + + // Test problem size constructors + CALL_SUBTEST_6(HessenbergDecomposition(10)); +} diff --git a/thirdparty/eigen/test/householder.cpp b/thirdparty/eigen/test/householder.cpp new file mode 100644 index 000000000..c5f6b5e4f --- /dev/null +++ b/thirdparty/eigen/test/householder.cpp @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void householder(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + static bool even = true; + even = !even; + /* this test covers the following files: + Householder.h + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + typedef Matrix::ret, 1> EssentialVectorType; + typedef Matrix SquareMatrixType; + typedef Matrix HBlockMatrixType; + typedef Matrix HCoeffsVectorType; + + typedef Matrix TMatrixType; + + Matrix _tmp((std::max)(rows,cols)); + Scalar* tmp = &_tmp.coeffRef(0,0); + + Scalar beta; + RealScalar alpha; + EssentialVectorType essential; + + VectorType v1 = VectorType::Random(rows), v2; + v2 = v1; + v1.makeHouseholder(essential, beta, alpha); + v1.applyHouseholderOnTheLeft(essential,beta,tmp); + VERIFY_IS_APPROX(v1.norm(), v2.norm()); + if(rows>=2) VERIFY_IS_MUCH_SMALLER_THAN(v1.tail(rows-1).norm(), v1.norm()); + v1 = VectorType::Random(rows); + v2 = v1; + v1.applyHouseholderOnTheLeft(essential,beta,tmp); + VERIFY_IS_APPROX(v1.norm(), v2.norm()); + + MatrixType m1(rows, cols), + m2(rows, cols); + + v1 = VectorType::Random(rows); + if(even) v1.tail(rows-1).setZero(); + m1.colwise() = v1; + m2 = m1; + m1.col(0).makeHouseholder(essential, beta, alpha); + m1.applyHouseholderOnTheLeft(essential,beta,tmp); + VERIFY_IS_APPROX(m1.norm(), m2.norm()); + if(rows>=2) VERIFY_IS_MUCH_SMALLER_THAN(m1.block(1,0,rows-1,cols).norm(), m1.norm()); + VERIFY_IS_MUCH_SMALLER_THAN(numext::imag(m1(0,0)), numext::real(m1(0,0))); + VERIFY_IS_APPROX(numext::real(m1(0,0)), alpha); + + v1 = VectorType::Random(rows); + if(even) v1.tail(rows-1).setZero(); + SquareMatrixType m3(rows,rows), m4(rows,rows); + m3.rowwise() = v1.transpose(); + m4 = m3; + m3.row(0).makeHouseholder(essential, beta, alpha); + m3.applyHouseholderOnTheRight(essential,beta,tmp); + VERIFY_IS_APPROX(m3.norm(), m4.norm()); + if(rows>=2) VERIFY_IS_MUCH_SMALLER_THAN(m3.block(0,1,rows,rows-1).norm(), m3.norm()); + VERIFY_IS_MUCH_SMALLER_THAN(numext::imag(m3(0,0)), numext::real(m3(0,0))); + VERIFY_IS_APPROX(numext::real(m3(0,0)), alpha); + + // test householder sequence on the left with a shift + + Index shift = internal::random(0, std::max(rows-2,0)); + Index brows = rows - shift; + m1.setRandom(rows, cols); + HBlockMatrixType hbm = m1.block(shift,0,brows,cols); + HouseholderQR qr(hbm); + m2 = m1; + m2.block(shift,0,brows,cols) = qr.matrixQR(); + HCoeffsVectorType hc = qr.hCoeffs().conjugate(); + HouseholderSequence hseq(m2, hc); + hseq.setLength(hc.size()).setShift(shift); + VERIFY(hseq.length() == hc.size()); + VERIFY(hseq.shift() == shift); + + MatrixType m5 = m2; + m5.block(shift,0,brows,cols).template triangularView().setZero(); + VERIFY_IS_APPROX(hseq * m5, m1); // test applying hseq directly + m3 = hseq; + VERIFY_IS_APPROX(m3 * m5, m1); // test evaluating hseq to a dense matrix, then applying + + SquareMatrixType hseq_mat = hseq; + SquareMatrixType hseq_mat_conj = hseq.conjugate(); + SquareMatrixType hseq_mat_adj = hseq.adjoint(); + SquareMatrixType hseq_mat_trans = hseq.transpose(); + SquareMatrixType m6 = SquareMatrixType::Random(rows, rows); + VERIFY_IS_APPROX(hseq_mat.adjoint(), hseq_mat_adj); + VERIFY_IS_APPROX(hseq_mat.conjugate(), hseq_mat_conj); + VERIFY_IS_APPROX(hseq_mat.transpose(), hseq_mat_trans); + VERIFY_IS_APPROX(hseq_mat * m6, hseq_mat * m6); + VERIFY_IS_APPROX(hseq_mat.adjoint() * m6, hseq_mat_adj * m6); + VERIFY_IS_APPROX(hseq_mat.conjugate() * m6, hseq_mat_conj * m6); + VERIFY_IS_APPROX(hseq_mat.transpose() * m6, hseq_mat_trans * m6); + VERIFY_IS_APPROX(m6 * hseq_mat, m6 * hseq_mat); + VERIFY_IS_APPROX(m6 * hseq_mat.adjoint(), m6 * hseq_mat_adj); + VERIFY_IS_APPROX(m6 * hseq_mat.conjugate(), m6 * hseq_mat_conj); + VERIFY_IS_APPROX(m6 * hseq_mat.transpose(), m6 * hseq_mat_trans); + + // test householder sequence on the right with a shift + + TMatrixType tm2 = m2.transpose(); + HouseholderSequence rhseq(tm2, hc); + rhseq.setLength(hc.size()).setShift(shift); + VERIFY_IS_APPROX(rhseq * m5, m1); // test applying rhseq directly + m3 = rhseq; + VERIFY_IS_APPROX(m3 * m5, m1); // test evaluating rhseq to a dense matrix, then applying +} + +void test_householder() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( householder(Matrix()) ); + CALL_SUBTEST_2( householder(Matrix()) ); + CALL_SUBTEST_3( householder(Matrix()) ); + CALL_SUBTEST_4( householder(Matrix()) ); + CALL_SUBTEST_5( householder(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( householder(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( householder(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( householder(Matrix()) ); + } +} diff --git a/thirdparty/eigen/test/incomplete_cholesky.cpp b/thirdparty/eigen/test/incomplete_cholesky.cpp new file mode 100644 index 000000000..59ffe9259 --- /dev/null +++ b/thirdparty/eigen/test/incomplete_cholesky.cpp @@ -0,0 +1,65 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// #define EIGEN_DONT_VECTORIZE +// #define EIGEN_MAX_ALIGN_BYTES 0 +#include "sparse_solver.h" +#include +#include + +template void test_incomplete_cholesky_T() +{ + typedef SparseMatrix SparseMatrixType; + ConjugateGradient > > cg_illt_lower_amd; + ConjugateGradient > > cg_illt_lower_nat; + ConjugateGradient > > cg_illt_upper_amd; + ConjugateGradient > > cg_illt_upper_nat; + ConjugateGradient > > cg_illt_uplo_amd; + + + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_amd) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_nat) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_amd) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_nat) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_uplo_amd) ); +} + +void test_incomplete_cholesky() +{ + CALL_SUBTEST_1(( test_incomplete_cholesky_T() )); + CALL_SUBTEST_2(( test_incomplete_cholesky_T, int>() )); + CALL_SUBTEST_3(( test_incomplete_cholesky_T() )); + +#ifdef EIGEN_TEST_PART_1 + // regression for bug 1150 + for(int N = 1; N<20; ++N) + { + Eigen::MatrixXd b( N, N ); + b.setOnes(); + + Eigen::SparseMatrix m( N, N ); + m.reserve(Eigen::VectorXi::Constant(N,4)); + for( int i = 0; i < N; ++i ) + { + m.insert( i, i ) = 1; + m.coeffRef( i, i / 2 ) = 2; + m.coeffRef( i, i / 3 ) = 2; + m.coeffRef( i, i / 4 ) = 2; + } + + Eigen::SparseMatrix A; + A = m * m.transpose(); + + Eigen::ConjugateGradient, + Eigen::Lower | Eigen::Upper, + Eigen::IncompleteCholesky > solver( A ); + VERIFY(solver.preconditioner().info() == Eigen::Success); + VERIFY(solver.info() == Eigen::Success); + } +#endif +} diff --git a/thirdparty/eigen/test/inplace_decomposition.cpp b/thirdparty/eigen/test/inplace_decomposition.cpp new file mode 100644 index 000000000..92d0d91b6 --- /dev/null +++ b/thirdparty/eigen/test/inplace_decomposition.cpp @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +// This file test inplace decomposition through Ref<>, as supported by Cholesky, LU, and QR decompositions. + +template void inplace(bool square = false, bool SPD = false) +{ + typedef typename MatrixType::Scalar Scalar; + typedef Matrix RhsType; + typedef Matrix ResType; + + Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random(2,EIGEN_TEST_MAX_SIZE/2) : Index(MatrixType::RowsAtCompileTime); + Index cols = MatrixType::ColsAtCompileTime==Dynamic ? (square?rows:internal::random(2,rows)) : Index(MatrixType::ColsAtCompileTime); + + MatrixType A = MatrixType::Random(rows,cols); + RhsType b = RhsType::Random(rows); + ResType x(cols); + + if(SPD) + { + assert(square); + A.topRows(cols) = A.topRows(cols).adjoint() * A.topRows(cols); + A.diagonal().array() += 1e-3; + } + + MatrixType A0 = A; + MatrixType A1 = A; + + DecType dec(A); + + // Check that the content of A has been modified + VERIFY_IS_NOT_APPROX( A, A0 ); + + // Check that the decomposition is correct: + if(rows==cols) + { + VERIFY_IS_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } + + // Check that modifying A breaks the current dec: + A.setRandom(); + if(rows==cols) + { + VERIFY_IS_NOT_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_NOT_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } + + // Check that calling compute(A1) does not modify A1: + A = A0; + dec.compute(A1); + VERIFY_IS_EQUAL(A0,A1); + VERIFY_IS_NOT_APPROX( A, A0 ); + if(rows==cols) + { + VERIFY_IS_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } +} + + +void test_inplace_decomposition() +{ + EIGEN_UNUSED typedef Matrix Matrix43d; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( inplace >, MatrixXd>(true,true) )); + CALL_SUBTEST_1(( inplace >, Matrix4d>(true,true) )); + + CALL_SUBTEST_2(( inplace >, MatrixXd>(true,true) )); + CALL_SUBTEST_2(( inplace >, Matrix4d>(true,true) )); + + CALL_SUBTEST_3(( inplace >, MatrixXd>(true,false) )); + CALL_SUBTEST_3(( inplace >, Matrix4d>(true,false) )); + + CALL_SUBTEST_4(( inplace >, MatrixXd>(true,false) )); + CALL_SUBTEST_4(( inplace >, Matrix4d>(true,false) )); + + CALL_SUBTEST_5(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_5(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_6(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_6(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_7(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_7(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_8(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_8(( inplace >, Matrix43d>(false,false) )); + } +} diff --git a/thirdparty/eigen/test/integer_types.cpp b/thirdparty/eigen/test/integer_types.cpp new file mode 100644 index 000000000..a21f73a81 --- /dev/null +++ b/thirdparty/eigen/test/integer_types.cpp @@ -0,0 +1,169 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT + +#include "main.h" + +#undef VERIFY_IS_APPROX +#define VERIFY_IS_APPROX(a, b) VERIFY((a)==(b)); +#undef VERIFY_IS_NOT_APPROX +#define VERIFY_IS_NOT_APPROX(a, b) VERIFY((a)!=(b)); + +template void signed_integer_type_tests(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + enum { is_signed = (Scalar(-1) > Scalar(0)) ? 0 : 1 }; + VERIFY(is_signed == 1); + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1(rows, cols), + m2 = MatrixType::Random(rows, cols), + mzero = MatrixType::Zero(rows, cols); + + do { + m1 = MatrixType::Random(rows, cols); + } while(m1 == mzero || m1 == m2); + + // check linear structure + + Scalar s1; + do { + s1 = internal::random(); + } while(s1 == 0); + + VERIFY_IS_EQUAL(-(-m1), m1); + VERIFY_IS_EQUAL(-m2+m1+m2, m1); + VERIFY_IS_EQUAL((-m1+m2)*s1, -s1*m1+s1*m2); +} + +template void integer_type_tests(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + VERIFY(NumTraits::IsInteger); + enum { is_signed = (Scalar(-1) > Scalar(0)) ? 0 : 1 }; + VERIFY(int(NumTraits::IsSigned) == is_signed); + + typedef Matrix VectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + + // this test relies a lot on Random.h, and there's not much more that we can do + // to test it, hence I consider that we will have tested Random.h + MatrixType m1(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + mzero = MatrixType::Zero(rows, cols); + + typedef Matrix SquareMatrixType; + SquareMatrixType identity = SquareMatrixType::Identity(rows, rows), + square = SquareMatrixType::Random(rows, rows); + VectorType v1(rows), + v2 = VectorType::Random(rows), + vzero = VectorType::Zero(rows); + + do { + m1 = MatrixType::Random(rows, cols); + } while(m1 == mzero || m1 == m2); + + do { + v1 = VectorType::Random(rows); + } while(v1 == vzero || v1 == v2); + + VERIFY_IS_APPROX( v1, v1); + VERIFY_IS_NOT_APPROX( v1, 2*v1); + VERIFY_IS_APPROX( vzero, v1-v1); + VERIFY_IS_APPROX( m1, m1); + VERIFY_IS_NOT_APPROX( m1, 2*m1); + VERIFY_IS_APPROX( mzero, m1-m1); + + VERIFY_IS_APPROX(m3 = m1,m1); + MatrixType m4; + VERIFY_IS_APPROX(m4 = m1,m1); + + m3.real() = m1.real(); + VERIFY_IS_APPROX(static_cast(m3).real(), static_cast(m1).real()); + VERIFY_IS_APPROX(static_cast(m3).real(), m1.real()); + + // check == / != operators + VERIFY(m1==m1); + VERIFY(m1!=m2); + VERIFY(!(m1==m2)); + VERIFY(!(m1!=m1)); + m1 = m2; + VERIFY(m1==m2); + VERIFY(!(m1!=m2)); + + // check linear structure + + Scalar s1; + do { + s1 = internal::random(); + } while(s1 == 0); + + VERIFY_IS_EQUAL(m1+m1, 2*m1); + VERIFY_IS_EQUAL(m1+m2-m1, m2); + VERIFY_IS_EQUAL(m1*s1, s1*m1); + VERIFY_IS_EQUAL((m1+m2)*s1, s1*m1+s1*m2); + m3 = m2; m3 += m1; + VERIFY_IS_EQUAL(m3, m1+m2); + m3 = m2; m3 -= m1; + VERIFY_IS_EQUAL(m3, m2-m1); + m3 = m2; m3 *= s1; + VERIFY_IS_EQUAL(m3, s1*m2); + + // check matrix product. + + VERIFY_IS_APPROX(identity * m1, m1); + VERIFY_IS_APPROX(square * (m1 + m2), square * m1 + square * m2); + VERIFY_IS_APPROX((m1 + m2).transpose() * square, m1.transpose() * square + m2.transpose() * square); + VERIFY_IS_APPROX((m1 * m2.transpose()) * m1, m1 * (m2.transpose() * m1)); +} + +void test_integer_types() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( integer_type_tests(Matrix()) ); + CALL_SUBTEST_1( integer_type_tests(Matrix()) ); + + CALL_SUBTEST_2( integer_type_tests(Matrix()) ); + CALL_SUBTEST_2( signed_integer_type_tests(Matrix()) ); + + CALL_SUBTEST_3( integer_type_tests(Matrix(2, 10)) ); + CALL_SUBTEST_3( signed_integer_type_tests(Matrix(2, 10)) ); + + CALL_SUBTEST_4( integer_type_tests(Matrix()) ); + CALL_SUBTEST_4( integer_type_tests(Matrix(20, 20)) ); + + CALL_SUBTEST_5( integer_type_tests(Matrix(7, 4)) ); + CALL_SUBTEST_5( signed_integer_type_tests(Matrix(7, 4)) ); + + CALL_SUBTEST_6( integer_type_tests(Matrix()) ); + + CALL_SUBTEST_7( integer_type_tests(Matrix()) ); + CALL_SUBTEST_7( signed_integer_type_tests(Matrix()) ); + + CALL_SUBTEST_8( integer_type_tests(Matrix(1, 5)) ); + } +#ifdef EIGEN_TEST_PART_9 + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + if(sizeof(long)>sizeof(int)) { + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + } +#endif +} diff --git a/thirdparty/eigen/test/inverse.cpp b/thirdparty/eigen/test/inverse.cpp new file mode 100644 index 000000000..5c6777a18 --- /dev/null +++ b/thirdparty/eigen/test/inverse.cpp @@ -0,0 +1,117 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void inverse(const MatrixType& m) +{ + using std::abs; + typedef typename MatrixType::Index Index; + /* this test covers the following files: + Inverse.h + */ + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + + MatrixType m1(rows, cols), + m2(rows, cols), + identity = MatrixType::Identity(rows, rows); + createRandomPIMatrixOfRank(rows,rows,rows,m1); + m2 = m1.inverse(); + VERIFY_IS_APPROX(m1, m2.inverse() ); + + VERIFY_IS_APPROX((Scalar(2)*m2).inverse(), m2.inverse()*Scalar(0.5)); + + VERIFY_IS_APPROX(identity, m1.inverse() * m1 ); + VERIFY_IS_APPROX(identity, m1 * m1.inverse() ); + + VERIFY_IS_APPROX(m1, m1.inverse().inverse() ); + + // since for the general case we implement separately row-major and col-major, test that + VERIFY_IS_APPROX(MatrixType(m1.transpose().inverse()), MatrixType(m1.inverse().transpose())); + +#if !defined(EIGEN_TEST_PART_5) && !defined(EIGEN_TEST_PART_6) + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + + //computeInverseAndDetWithCheck tests + //First: an invertible matrix + bool invertible; + RealScalar det; + + m2.setZero(); + m1.computeInverseAndDetWithCheck(m2, det, invertible); + VERIFY(invertible); + VERIFY_IS_APPROX(identity, m1*m2); + VERIFY_IS_APPROX(det, m1.determinant()); + + m2.setZero(); + m1.computeInverseWithCheck(m2, invertible); + VERIFY(invertible); + VERIFY_IS_APPROX(identity, m1*m2); + + //Second: a rank one matrix (not invertible, except for 1x1 matrices) + VectorType v3 = VectorType::Random(rows); + MatrixType m3 = v3*v3.transpose(), m4(rows,cols); + m3.computeInverseAndDetWithCheck(m4, det, invertible); + VERIFY( rows==1 ? invertible : !invertible ); + VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1)); + m3.computeInverseWithCheck(m4, invertible); + VERIFY( rows==1 ? invertible : !invertible ); + + // check with submatrices + { + Matrix m5; + m5.setRandom(); + m5.topLeftCorner(rows,rows) = m1; + m2 = m5.template topLeftCorner().inverse(); + VERIFY_IS_APPROX( (m5.template topLeftCorner()), m2.inverse() ); + } +#endif + + // check in-place inversion + if(MatrixType::RowsAtCompileTime>=2 && MatrixType::RowsAtCompileTime<=4) + { + // in-place is forbidden + VERIFY_RAISES_ASSERT(m1 = m1.inverse()); + } + else + { + m2 = m1.inverse(); + m1 = m1.inverse(); + VERIFY_IS_APPROX(m1,m2); + } +} + +void test_inverse() +{ + int s = 0; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( inverse(Matrix()) ); + CALL_SUBTEST_2( inverse(Matrix2d()) ); + CALL_SUBTEST_3( inverse(Matrix3f()) ); + CALL_SUBTEST_4( inverse(Matrix4f()) ); + CALL_SUBTEST_4( inverse(Matrix()) ); + + s = internal::random(50,320); + CALL_SUBTEST_5( inverse(MatrixXf(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(25,100); + CALL_SUBTEST_6( inverse(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + CALL_SUBTEST_7( inverse(Matrix4d()) ); + CALL_SUBTEST_7( inverse(Matrix()) ); + } +} diff --git a/thirdparty/eigen/test/is_same_dense.cpp b/thirdparty/eigen/test/is_same_dense.cpp new file mode 100644 index 000000000..2c7838ce9 --- /dev/null +++ b/thirdparty/eigen/test/is_same_dense.cpp @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +using internal::is_same_dense; + +void test_is_same_dense() +{ + typedef Matrix ColMatrixXd; + ColMatrixXd m1(10,10); + Ref ref_m1(m1); + Ref const_ref_m1(m1); + VERIFY(is_same_dense(m1,m1)); + VERIFY(is_same_dense(m1,ref_m1)); + VERIFY(is_same_dense(const_ref_m1,m1)); + VERIFY(is_same_dense(const_ref_m1,ref_m1)); + + VERIFY(is_same_dense(m1.block(0,0,m1.rows(),m1.cols()),m1)); + VERIFY(!is_same_dense(m1.row(0),m1.col(0))); + + Ref const_ref_m1_row(m1.row(1)); + VERIFY(!is_same_dense(m1.row(1),const_ref_m1_row)); + + Ref const_ref_m1_col(m1.col(1)); + VERIFY(is_same_dense(m1.col(1),const_ref_m1_col)); +} diff --git a/thirdparty/eigen/test/jacobi.cpp b/thirdparty/eigen/test/jacobi.cpp new file mode 100644 index 000000000..7ccd4124b --- /dev/null +++ b/thirdparty/eigen/test/jacobi.cpp @@ -0,0 +1,81 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template +void jacobi(const MatrixType& m = MatrixType()) +{ + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix JacobiVector; + + const MatrixType a(MatrixType::Random(rows, cols)); + + JacobiVector v = JacobiVector::Random().normalized(); + JacobiScalar c = v.x(), s = v.y(); + JacobiRotation rot(c, s); + + { + Index p = internal::random(0, rows-1); + Index q; + do { + q = internal::random(0, rows-1); + } while (q == p); + + MatrixType b = a; + b.applyOnTheLeft(p, q, rot); + VERIFY_IS_APPROX(b.row(p), c * a.row(p) + numext::conj(s) * a.row(q)); + VERIFY_IS_APPROX(b.row(q), -s * a.row(p) + numext::conj(c) * a.row(q)); + } + + { + Index p = internal::random(0, cols-1); + Index q; + do { + q = internal::random(0, cols-1); + } while (q == p); + + MatrixType b = a; + b.applyOnTheRight(p, q, rot); + VERIFY_IS_APPROX(b.col(p), c * a.col(p) - s * a.col(q)); + VERIFY_IS_APPROX(b.col(q), numext::conj(s) * a.col(p) + numext::conj(c) * a.col(q)); + } +} + +void test_jacobi() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( jacobi() )); + CALL_SUBTEST_2(( jacobi() )); + CALL_SUBTEST_3(( jacobi() )); + CALL_SUBTEST_3(( jacobi >() )); + + int r = internal::random(2, internal::random(1,EIGEN_TEST_MAX_SIZE)/2), + c = internal::random(2, internal::random(1,EIGEN_TEST_MAX_SIZE)/2); + CALL_SUBTEST_4(( jacobi(MatrixXf(r,c)) )); + CALL_SUBTEST_5(( jacobi(MatrixXcd(r,c)) )); + CALL_SUBTEST_5(( jacobi >(MatrixXcd(r,c)) )); + // complex is really important to test as it is the only way to cover conjugation issues in certain unaligned paths + CALL_SUBTEST_6(( jacobi(MatrixXcf(r,c)) )); + CALL_SUBTEST_6(( jacobi >(MatrixXcf(r,c)) )); + + TEST_SET_BUT_UNUSED_VARIABLE(r); + TEST_SET_BUT_UNUSED_VARIABLE(c); + } +} diff --git a/thirdparty/eigen/test/jacobisvd.cpp b/thirdparty/eigen/test/jacobisvd.cpp new file mode 100644 index 000000000..3d8d0203d --- /dev/null +++ b/thirdparty/eigen/test/jacobisvd.cpp @@ -0,0 +1,120 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// discard stack allocation as that too bypasses malloc +#define EIGEN_STACK_ALLOCATION_LIMIT 0 +#define EIGEN_RUNTIME_NO_MALLOC +#include "main.h" +#include + +#define SVD_DEFAULT(M) JacobiSVD +#define SVD_FOR_MIN_NORM(M) JacobiSVD +#include "svd_common.h" + +// Check all variants of JacobiSVD +template +void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true) +{ + MatrixType m = a; + if(pickrandom) + svd_fill_random(m); + + CALL_SUBTEST(( svd_test_all_computation_options >(m, true) )); // check full only + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); + if(m.rows()==m.cols()) + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); +} + +template void jacobisvd_verify_assert(const MatrixType& m) +{ + svd_verify_assert >(m); + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + + MatrixType a = MatrixType::Zero(rows, cols); + a.setZero(); + + if (ColsAtCompileTime == Dynamic) + { + JacobiSVD svd_fullqr; + VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV)) + VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV)) + VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV)) + } +} + +template +void jacobisvd_method() +{ + enum { Size = MatrixType::RowsAtCompileTime }; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix RealVecType; + MatrixType m = MatrixType::Identity(); + VERIFY_IS_APPROX(m.jacobiSvd().singularValues(), RealVecType::Ones()); + VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixU()); + VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixV()); + VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m); +} + +void test_jacobisvd() +{ + CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) )); + CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) )); + CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) )); + CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) )); + + CALL_SUBTEST_11(svd_all_trivial_2x2(jacobisvd)); + CALL_SUBTEST_12(svd_all_trivial_2x2(jacobisvd)); + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_3(( jacobisvd() )); + CALL_SUBTEST_4(( jacobisvd() )); + CALL_SUBTEST_5(( jacobisvd >() )); + CALL_SUBTEST_6(( jacobisvd >(Matrix(10,2)) )); + + int r = internal::random(1, 30), + c = internal::random(1, 30); + + TEST_SET_BUT_UNUSED_VARIABLE(r) + TEST_SET_BUT_UNUSED_VARIABLE(c) + + CALL_SUBTEST_10(( jacobisvd(MatrixXd(r,c)) )); + CALL_SUBTEST_7(( jacobisvd(MatrixXf(r,c)) )); + CALL_SUBTEST_8(( jacobisvd(MatrixXcd(r,c)) )); + (void) r; + (void) c; + + // Test on inf/nan matrix + CALL_SUBTEST_7( (svd_inf_nan, MatrixXf>()) ); + CALL_SUBTEST_10( (svd_inf_nan, MatrixXd>()) ); + } + + CALL_SUBTEST_7(( jacobisvd(MatrixXf(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); + CALL_SUBTEST_8(( jacobisvd(MatrixXcd(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) )); + + // test matrixbase method + CALL_SUBTEST_1(( jacobisvd_method() )); + CALL_SUBTEST_3(( jacobisvd_method() )); + + // Test problem size constructors + CALL_SUBTEST_7( JacobiSVD(10,10) ); + + // Check that preallocation avoids subsequent mallocs + CALL_SUBTEST_9( svd_preallocate() ); + + CALL_SUBTEST_2( svd_underoverflow() ); +} diff --git a/thirdparty/eigen/test/linearstructure.cpp b/thirdparty/eigen/test/linearstructure.cpp new file mode 100644 index 000000000..17474af10 --- /dev/null +++ b/thirdparty/eigen/test/linearstructure.cpp @@ -0,0 +1,149 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + +#include "main.h" + +template void linearStructure(const MatrixType& m) +{ + using std::abs; + /* this test covers the following files: + CwiseUnaryOp.h, CwiseBinaryOp.h, SelfCwiseBinaryOp.h + */ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + // this test relies a lot on Random.h, and there's not much more that we can do + // to test it, hence I consider that we will have tested Random.h + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols); + + Scalar s1 = internal::random(); + while (abs(s1)(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + VERIFY_IS_APPROX(-(-m1), m1); + VERIFY_IS_APPROX(m1+m1, 2*m1); + VERIFY_IS_APPROX(m1+m2-m1, m2); + VERIFY_IS_APPROX(-m2+m1+m2, m1); + VERIFY_IS_APPROX(m1*s1, s1*m1); + VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2); + VERIFY_IS_APPROX((-m1+m2)*s1, -s1*m1+s1*m2); + m3 = m2; m3 += m1; + VERIFY_IS_APPROX(m3, m1+m2); + m3 = m2; m3 -= m1; + VERIFY_IS_APPROX(m3, m2-m1); + m3 = m2; m3 *= s1; + VERIFY_IS_APPROX(m3, s1*m2); + if(!NumTraits::IsInteger) + { + m3 = m2; m3 /= s1; + VERIFY_IS_APPROX(m3, m2/s1); + } + + // again, test operator() to check const-qualification + VERIFY_IS_APPROX((-m1)(r,c), -(m1(r,c))); + VERIFY_IS_APPROX((m1-m2)(r,c), (m1(r,c))-(m2(r,c))); + VERIFY_IS_APPROX((m1+m2)(r,c), (m1(r,c))+(m2(r,c))); + VERIFY_IS_APPROX((s1*m1)(r,c), s1*(m1(r,c))); + VERIFY_IS_APPROX((m1*s1)(r,c), (m1(r,c))*s1); + if(!NumTraits::IsInteger) + VERIFY_IS_APPROX((m1/s1)(r,c), (m1(r,c))/s1); + + // use .block to disable vectorization and compare to the vectorized version + VERIFY_IS_APPROX(m1+m1.block(0,0,rows,cols), m1+m1); + VERIFY_IS_APPROX(m1.cwiseProduct(m1.block(0,0,rows,cols)), m1.cwiseProduct(m1)); + VERIFY_IS_APPROX(m1 - m1.block(0,0,rows,cols), m1 - m1); + VERIFY_IS_APPROX(m1.block(0,0,rows,cols) * s1, m1 * s1); +} + +// Make sure that complex * real and real * complex are properly optimized +template void real_complex(DenseIndex rows = MatrixType::RowsAtCompileTime, DenseIndex cols = MatrixType::ColsAtCompileTime) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + RealScalar s = internal::random(); + MatrixType m1 = MatrixType::Random(rows, cols); + + g_called = false; + VERIFY_IS_APPROX(s*m1, Scalar(s)*m1); + VERIFY(g_called && "real * matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1*s, m1*Scalar(s)); + VERIFY(g_called && "matrix * real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); + VERIFY(g_called && "matrix / real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array()); + VERIFY(g_called && "real + matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s)); + VERIFY(g_called && "matrix + real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array()); + VERIFY(g_called && "real - matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s)); + VERIFY(g_called && "matrix - real not properly optimized"); +} + +void test_linearstructure() +{ + g_called = true; + VERIFY(g_called); // avoid `unneeded-internal-declaration` warning. + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( linearStructure(Matrix()) ); + CALL_SUBTEST_2( linearStructure(Matrix2f()) ); + CALL_SUBTEST_3( linearStructure(Vector3d()) ); + CALL_SUBTEST_4( linearStructure(Matrix4d()) ); + CALL_SUBTEST_5( linearStructure(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_6( linearStructure(MatrixXf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( linearStructure(MatrixXi (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_8( linearStructure(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_9( linearStructure(ArrayXXf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_10( linearStructure(ArrayXXcf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + + CALL_SUBTEST_11( real_complex() ); + CALL_SUBTEST_11( real_complex(10,10) ); + CALL_SUBTEST_11( real_complex(10,10) ); + } + +#ifdef EIGEN_TEST_PART_4 + { + // make sure that /=scalar and /scalar do not overflow + // rational: 1.0/4.94e-320 overflow, but m/4.94e-320 should not + Matrix4d m2, m3; + m3 = m2 = Matrix4d::Random()*1e-20; + m2 = m2 / 4.9e-320; + VERIFY_IS_APPROX(m2.cwiseQuotient(m2), Matrix4d::Ones()); + m3 /= 4.9e-320; + VERIFY_IS_APPROX(m3.cwiseQuotient(m3), Matrix4d::Ones()); + + + } +#endif +} diff --git a/thirdparty/eigen/test/lscg.cpp b/thirdparty/eigen/test/lscg.cpp new file mode 100644 index 000000000..daa62a954 --- /dev/null +++ b/thirdparty/eigen/test/lscg.cpp @@ -0,0 +1,29 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_lscg_T() +{ + LeastSquaresConjugateGradient > lscg_colmajor_diag; + LeastSquaresConjugateGradient, IdentityPreconditioner> lscg_colmajor_I; + + CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_diag) ); + CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_I) ); + + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_diag) ); + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_I) ); +} + +void test_lscg() +{ + CALL_SUBTEST_1(test_lscg_T()); + CALL_SUBTEST_2(test_lscg_T >()); +} diff --git a/thirdparty/eigen/test/lu.cpp b/thirdparty/eigen/test/lu.cpp new file mode 100644 index 000000000..9787f4d86 --- /dev/null +++ b/thirdparty/eigen/test/lu.cpp @@ -0,0 +1,281 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +using namespace std; + +template +typename MatrixType::RealScalar matrix_l1_norm(const MatrixType& m) { + return m.cwiseAbs().colwise().sum().maxCoeff(); +} + +template void lu_non_invertible() +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::RealScalar RealScalar; + /* this test covers the following files: + LU.h + */ + Index rows, cols, cols2; + if(MatrixType::RowsAtCompileTime==Dynamic) + { + rows = internal::random(2,EIGEN_TEST_MAX_SIZE); + } + else + { + rows = MatrixType::RowsAtCompileTime; + } + if(MatrixType::ColsAtCompileTime==Dynamic) + { + cols = internal::random(2,EIGEN_TEST_MAX_SIZE); + cols2 = internal::random(2,EIGEN_TEST_MAX_SIZE); + } + else + { + cols2 = cols = MatrixType::ColsAtCompileTime; + } + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + typedef typename internal::kernel_retval_base >::ReturnType KernelMatrixType; + typedef typename internal::image_retval_base >::ReturnType ImageMatrixType; + typedef Matrix + CMatrixType; + typedef Matrix + RMatrixType; + + Index rank = internal::random(1, (std::min)(rows, cols)-1); + + // The image of the zero matrix should consist of a single (zero) column vector + VERIFY((MatrixType::Zero(rows,cols).fullPivLu().image(MatrixType::Zero(rows,cols)).cols() == 1)); + + MatrixType m1(rows, cols), m3(rows, cols2); + CMatrixType m2(cols, cols2); + createRandomPIMatrixOfRank(rank, rows, cols, m1); + + FullPivLU lu; + + // The special value 0.01 below works well in tests. Keep in mind that we're only computing the rank + // of singular values are either 0 or 1. + // So it's not clear at all that the epsilon should play any role there. + lu.setThreshold(RealScalar(0.01)); + lu.compute(m1); + + MatrixType u(rows,cols); + u = lu.matrixLU().template triangularView(); + RMatrixType l = RMatrixType::Identity(rows,rows); + l.block(0,0,rows,(std::min)(rows,cols)).template triangularView() + = lu.matrixLU().block(0,0,rows,(std::min)(rows,cols)); + + VERIFY_IS_APPROX(lu.permutationP() * m1 * lu.permutationQ(), l*u); + + KernelMatrixType m1kernel = lu.kernel(); + ImageMatrixType m1image = lu.image(m1); + + VERIFY_IS_APPROX(m1, lu.reconstructedMatrix()); + VERIFY(rank == lu.rank()); + VERIFY(cols - lu.rank() == lu.dimensionOfKernel()); + VERIFY(!lu.isInjective()); + VERIFY(!lu.isInvertible()); + VERIFY(!lu.isSurjective()); + VERIFY((m1 * m1kernel).isMuchSmallerThan(m1)); + VERIFY(m1image.fullPivLu().rank() == rank); + VERIFY_IS_APPROX(m1 * m1.adjoint() * m1image, m1image); + + m2 = CMatrixType::Random(cols,cols2); + m3 = m1*m2; + m2 = CMatrixType::Random(cols,cols2); + // test that the code, which does resize(), may be applied to an xpr + m2.block(0,0,m2.rows(),m2.cols()) = lu.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + + // test solve with transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.transpose()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.adjoint()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); +} + +template void lu_invertible() +{ + /* this test covers the following files: + LU.h + */ + typedef typename NumTraits::Real RealScalar; + Index size = MatrixType::RowsAtCompileTime; + if( size==Dynamic) + size = internal::random(1,EIGEN_TEST_MAX_SIZE); + + MatrixType m1(size, size), m2(size, size), m3(size, size); + FullPivLU lu; + lu.setThreshold(RealScalar(0.01)); + do { + m1 = MatrixType::Random(size,size); + lu.compute(m1); + } while(!lu.isInvertible()); + + VERIFY_IS_APPROX(m1, lu.reconstructedMatrix()); + VERIFY(0 == lu.dimensionOfKernel()); + VERIFY(lu.kernel().cols() == 1); // the kernel() should consist of a single (zero) column vector + VERIFY(size == lu.rank()); + VERIFY(lu.isInjective()); + VERIFY(lu.isSurjective()); + VERIFY(lu.isInvertible()); + VERIFY(lu.image(m1).fullPivLu().isInvertible()); + m3 = MatrixType::Random(size,size); + m2 = lu.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + MatrixType m1_inverse = lu.inverse(); + VERIFY_IS_APPROX(m2, m1_inverse*m3); + + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(m1)) / matrix_l1_norm(m1_inverse); + const RealScalar rcond_est = lu.rcond(); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + // test solve with transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); + + // Regression test for Bug 302 + MatrixType m4 = MatrixType::Random(size,size); + VERIFY_IS_APPROX(lu.solve(m3*m4), lu.solve(m3)*m4); +} + +template void lu_partial_piv() +{ + /* this test covers the following files: + PartialPivLU.h + */ + typedef typename MatrixType::Index Index; + typedef typename NumTraits::Real RealScalar; + Index size = internal::random(1,4); + + MatrixType m1(size, size), m2(size, size), m3(size, size); + m1.setRandom(); + PartialPivLU plu(m1); + + VERIFY_IS_APPROX(m1, plu.reconstructedMatrix()); + + m3 = MatrixType::Random(size,size); + m2 = plu.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + MatrixType m1_inverse = plu.inverse(); + VERIFY_IS_APPROX(m2, m1_inverse*m3); + + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(m1)) / matrix_l1_norm(m1_inverse); + const RealScalar rcond_est = plu.rcond(); + // Verify that the estimate is within a factor of 10 of the truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + // test solve with transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); +} + +template void lu_verify_assert() +{ + MatrixType tmp; + + FullPivLU lu; + VERIFY_RAISES_ASSERT(lu.matrixLU()) + VERIFY_RAISES_ASSERT(lu.permutationP()) + VERIFY_RAISES_ASSERT(lu.permutationQ()) + VERIFY_RAISES_ASSERT(lu.kernel()) + VERIFY_RAISES_ASSERT(lu.image(tmp)) + VERIFY_RAISES_ASSERT(lu.solve(tmp)) + VERIFY_RAISES_ASSERT(lu.determinant()) + VERIFY_RAISES_ASSERT(lu.rank()) + VERIFY_RAISES_ASSERT(lu.dimensionOfKernel()) + VERIFY_RAISES_ASSERT(lu.isInjective()) + VERIFY_RAISES_ASSERT(lu.isSurjective()) + VERIFY_RAISES_ASSERT(lu.isInvertible()) + VERIFY_RAISES_ASSERT(lu.inverse()) + + PartialPivLU plu; + VERIFY_RAISES_ASSERT(plu.matrixLU()) + VERIFY_RAISES_ASSERT(plu.permutationP()) + VERIFY_RAISES_ASSERT(plu.solve(tmp)) + VERIFY_RAISES_ASSERT(plu.determinant()) + VERIFY_RAISES_ASSERT(plu.inverse()) +} + +void test_lu() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( lu_non_invertible() ); + CALL_SUBTEST_1( lu_invertible() ); + CALL_SUBTEST_1( lu_verify_assert() ); + + CALL_SUBTEST_2( (lu_non_invertible >()) ); + CALL_SUBTEST_2( (lu_verify_assert >()) ); + + CALL_SUBTEST_3( lu_non_invertible() ); + CALL_SUBTEST_3( lu_invertible() ); + CALL_SUBTEST_3( lu_verify_assert() ); + + CALL_SUBTEST_4( lu_non_invertible() ); + CALL_SUBTEST_4( lu_invertible() ); + CALL_SUBTEST_4( lu_partial_piv() ); + CALL_SUBTEST_4( lu_verify_assert() ); + + CALL_SUBTEST_5( lu_non_invertible() ); + CALL_SUBTEST_5( lu_invertible() ); + CALL_SUBTEST_5( lu_verify_assert() ); + + CALL_SUBTEST_6( lu_non_invertible() ); + CALL_SUBTEST_6( lu_invertible() ); + CALL_SUBTEST_6( lu_partial_piv() ); + CALL_SUBTEST_6( lu_verify_assert() ); + + CALL_SUBTEST_7(( lu_non_invertible >() )); + + // Test problem size constructors + CALL_SUBTEST_9( PartialPivLU(10) ); + CALL_SUBTEST_9( FullPivLU(10, 20); ); + } +} diff --git a/thirdparty/eigen/test/main.h b/thirdparty/eigen/test/main.h new file mode 100644 index 000000000..74ff96a23 --- /dev/null +++ b/thirdparty/eigen/test/main.h @@ -0,0 +1,743 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// The following includes of STL headers have to be done _before_ the +// definition of macros min() and max(). The reason is that many STL +// implementations will not work properly as the min and max symbols collide +// with the STL functions std:min() and std::max(). The STL headers may check +// for the macro definition of min/max and issue a warning or undefine the +// macros. +// +// Still, Windows defines min() and max() in windef.h as part of the regular +// Windows system interfaces and many other Windows APIs depend on these +// macros being available. To prevent the macro expansion of min/max and to +// make Eigen compatible with the Windows environment all function calls of +// std::min() and std::max() have to be written with parenthesis around the +// function name. +// +// All STL headers used by Eigen should be included here. Because main.h is +// included before any Eigen header and because the STL headers are guarded +// against multiple inclusions, no STL header will see our own min/max macro +// definitions. +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201103L +#include +#ifdef EIGEN_USE_THREADS +#include +#endif +#endif + +// To test that all calls from Eigen code to std::min() and std::max() are +// protected by parenthesis against macro expansion, the min()/max() macros +// are defined here and any not-parenthesized min/max call will cause a +// compiler error. +#define min(A,B) please_protect_your_min_with_parentheses +#define max(A,B) please_protect_your_max_with_parentheses +#define isnan(X) please_protect_your_isnan_with_parentheses +#define isinf(X) please_protect_your_isinf_with_parentheses +#define isfinite(X) please_protect_your_isfinite_with_parentheses +#ifdef M_PI +#undef M_PI +#endif +#define M_PI please_use_EIGEN_PI_instead_of_M_PI + +#define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes +// B0 is defined in POSIX header termios.h +#define B0 FORBIDDEN_IDENTIFIER + +// Unit tests calling Eigen's blas library must preserve the default blocking size +// to avoid troubles. +#ifndef EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#define EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS +#endif + +// shuts down ICC's remark #593: variable "XXX" was set but never used +#define TEST_SET_BUT_UNUSED_VARIABLE(X) EIGEN_UNUSED_VARIABLE(X) + +#ifdef TEST_ENABLE_TEMPORARY_TRACKING + +static long int nb_temporaries; + +inline void on_temporary_creation(long int size) { + // here's a great place to set a breakpoint when debugging failures in this test! + if(size!=0) nb_temporaries++; +} + +#define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { on_temporary_creation(size); } + +#define VERIFY_EVALUATION_COUNT(XPR,N) {\ + nb_temporaries = 0; \ + XPR; \ + if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ + VERIFY( (#XPR) && nb_temporaries==N ); \ + } + +#endif + +// the following file is automatically generated by cmake +#include "split_test_helper.h" + +#ifdef NDEBUG +#undef NDEBUG +#endif + +// On windows CE, NDEBUG is automatically defined if NDEBUG is not defined. +#ifndef DEBUG +#define DEBUG +#endif + +// bounds integer values for AltiVec +#if defined(__ALTIVEC__) || defined(__VSX__) +#define EIGEN_MAKING_DOCS +#endif + +#ifndef EIGEN_TEST_FUNC +#error EIGEN_TEST_FUNC must be defined +#endif + +#define DEFAULT_REPEAT 10 + +namespace Eigen +{ + static std::vector g_test_stack; + // level == 0 <=> abort if test fail + // level >= 1 <=> warning message to std::cerr if test fail + static int g_test_level = 0; + static int g_repeat; + static unsigned int g_seed; + static bool g_has_set_repeat, g_has_set_seed; +} + +#define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl +// #define TRACK while() + +#define EI_PP_MAKE_STRING2(S) #S +#define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S) + +#define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, 0, " ", "\n", "", "", "", "") + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) + #define EIGEN_EXCEPTIONS +#endif + +#ifndef EIGEN_NO_ASSERTION_CHECKING + + namespace Eigen + { + static const bool should_raise_an_assert = false; + + // Used to avoid to raise two exceptions at a time in which + // case the exception is not properly caught. + // This may happen when a second exceptions is triggered in a destructor. + static bool no_more_assert = false; + static bool report_on_cerr_on_assert_failure = true; + + struct eigen_assert_exception + { + eigen_assert_exception(void) {} + ~eigen_assert_exception() { Eigen::no_more_assert = false; } + }; + } + // If EIGEN_DEBUG_ASSERTS is defined and if no assertion is triggered while + // one should have been, then the list of excecuted assertions is printed out. + // + // EIGEN_DEBUG_ASSERTS is not enabled by default as it + // significantly increases the compilation time + // and might even introduce side effects that would hide + // some memory errors. + #ifdef EIGEN_DEBUG_ASSERTS + + namespace Eigen + { + namespace internal + { + static bool push_assert = false; + } + static std::vector eigen_assert_list; + } + #define eigen_assert(a) \ + if( (!(a)) && (!no_more_assert) ) \ + { \ + if(report_on_cerr_on_assert_failure) \ + std::cerr << #a << " " __FILE__ << "(" << __LINE__ << ")\n"; \ + Eigen::no_more_assert = true; \ + EIGEN_THROW_X(Eigen::eigen_assert_exception()); \ + } \ + else if (Eigen::internal::push_assert) \ + { \ + eigen_assert_list.push_back(std::string(EI_PP_MAKE_STRING(__FILE__) " (" EI_PP_MAKE_STRING(__LINE__) ") : " #a) ); \ + } + + #ifdef EIGEN_EXCEPTIONS + #define VERIFY_RAISES_ASSERT(a) \ + { \ + Eigen::no_more_assert = false; \ + Eigen::eigen_assert_list.clear(); \ + Eigen::internal::push_assert = true; \ + Eigen::report_on_cerr_on_assert_failure = false; \ + try { \ + a; \ + std::cerr << "One of the following asserts should have been triggered:\n"; \ + for (uint ai=0 ; ai // required for createRandomPIMatrixOfRank + +inline void verify_impl(bool condition, const char *testname, const char *file, int line, const char *condition_as_string) +{ + if (!condition) + { + if(Eigen::g_test_level>0) + std::cerr << "WARNING: "; + std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")" + << std::endl << " " << condition_as_string << std::endl; + std::cerr << "Stack:\n"; + const int test_stack_size = static_cast(Eigen::g_test_stack.size()); + for(int i=test_stack_size-1; i>=0; --i) + std::cerr << " - " << Eigen::g_test_stack[i] << "\n"; + std::cerr << "\n"; + if(Eigen::g_test_level==0) + abort(); + } +} + +#define VERIFY(a) ::verify_impl(a, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a)) + +#define VERIFY_GE(a, b) ::verify_impl(a >= b, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a >= b)) +#define VERIFY_LE(a, b) ::verify_impl(a <= b, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a <= b)) + + +#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b, true)) +#define VERIFY_IS_NOT_EQUAL(a, b) VERIFY(test_is_equal(a, b, false)) +#define VERIFY_IS_APPROX(a, b) VERIFY(verifyIsApprox(a, b)) +#define VERIFY_IS_NOT_APPROX(a, b) VERIFY(!test_isApprox(a, b)) +#define VERIFY_IS_MUCH_SMALLER_THAN(a, b) VERIFY(test_isMuchSmallerThan(a, b)) +#define VERIFY_IS_NOT_MUCH_SMALLER_THAN(a, b) VERIFY(!test_isMuchSmallerThan(a, b)) +#define VERIFY_IS_APPROX_OR_LESS_THAN(a, b) VERIFY(test_isApproxOrLessThan(a, b)) +#define VERIFY_IS_NOT_APPROX_OR_LESS_THAN(a, b) VERIFY(!test_isApproxOrLessThan(a, b)) + +#define VERIFY_IS_UNITARY(a) VERIFY(test_isUnitary(a)) + +#define CALL_SUBTEST(FUNC) do { \ + g_test_stack.push_back(EI_PP_MAKE_STRING(FUNC)); \ + FUNC; \ + g_test_stack.pop_back(); \ + } while (0) + + +namespace Eigen { + +template inline typename NumTraits::Real test_precision() { return NumTraits::dummy_precision(); } +template<> inline float test_precision() { return 1e-3f; } +template<> inline double test_precision() { return 1e-6; } +template<> inline long double test_precision() { return 1e-6l; } +template<> inline float test_precision >() { return test_precision(); } +template<> inline double test_precision >() { return test_precision(); } +template<> inline long double test_precision >() { return test_precision(); } + +inline bool test_isApprox(const int& a, const int& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isMuchSmallerThan(const int& a, const int& b) +{ return internal::isMuchSmallerThan(a, b, test_precision()); } +inline bool test_isApproxOrLessThan(const int& a, const int& b) +{ return internal::isApproxOrLessThan(a, b, test_precision()); } + +inline bool test_isApprox(const float& a, const float& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isMuchSmallerThan(const float& a, const float& b) +{ return internal::isMuchSmallerThan(a, b, test_precision()); } +inline bool test_isApproxOrLessThan(const float& a, const float& b) +{ return internal::isApproxOrLessThan(a, b, test_precision()); } + +inline bool test_isApprox(const double& a, const double& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isMuchSmallerThan(const double& a, const double& b) +{ return internal::isMuchSmallerThan(a, b, test_precision()); } +inline bool test_isApproxOrLessThan(const double& a, const double& b) +{ return internal::isApproxOrLessThan(a, b, test_precision()); } + +#ifndef EIGEN_TEST_NO_COMPLEX +inline bool test_isApprox(const std::complex& a, const std::complex& b) +{ return internal::isApprox(a, b, test_precision >()); } +inline bool test_isMuchSmallerThan(const std::complex& a, const std::complex& b) +{ return internal::isMuchSmallerThan(a, b, test_precision >()); } + +inline bool test_isApprox(const std::complex& a, const std::complex& b) +{ return internal::isApprox(a, b, test_precision >()); } +inline bool test_isMuchSmallerThan(const std::complex& a, const std::complex& b) +{ return internal::isMuchSmallerThan(a, b, test_precision >()); } + +#ifndef EIGEN_TEST_NO_LONGDOUBLE +inline bool test_isApprox(const std::complex& a, const std::complex& b) +{ return internal::isApprox(a, b, test_precision >()); } +inline bool test_isMuchSmallerThan(const std::complex& a, const std::complex& b) +{ return internal::isMuchSmallerThan(a, b, test_precision >()); } +#endif +#endif + +#ifndef EIGEN_TEST_NO_LONGDOUBLE +inline bool test_isApprox(const long double& a, const long double& b) +{ + bool ret = internal::isApprox(a, b, test_precision()); + if (!ret) std::cerr + << std::endl << " actual = " << a + << std::endl << " expected = " << b << std::endl << std::endl; + return ret; +} + +inline bool test_isMuchSmallerThan(const long double& a, const long double& b) +{ return internal::isMuchSmallerThan(a, b, test_precision()); } +inline bool test_isApproxOrLessThan(const long double& a, const long double& b) +{ return internal::isApproxOrLessThan(a, b, test_precision()); } +#endif // EIGEN_TEST_NO_LONGDOUBLE + +inline bool test_isApprox(const half& a, const half& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isMuchSmallerThan(const half& a, const half& b) +{ return internal::isMuchSmallerThan(a, b, test_precision()); } +inline bool test_isApproxOrLessThan(const half& a, const half& b) +{ return internal::isApproxOrLessThan(a, b, test_precision()); } + +// test_relative_error returns the relative difference between a and b as a real scalar as used in isApprox. +template +typename T1::RealScalar test_relative_error(const EigenBase &a, const EigenBase &b) +{ + using std::sqrt; + typedef typename T1::RealScalar RealScalar; + typename internal::nested_eval::type ea(a.derived()); + typename internal::nested_eval::type eb(b.derived()); + return sqrt(RealScalar((ea-eb).cwiseAbs2().sum()) / RealScalar((std::min)(eb.cwiseAbs2().sum(),ea.cwiseAbs2().sum()))); +} + +template +typename T1::RealScalar test_relative_error(const T1 &a, const T2 &b, const typename T1::Coefficients* = 0) +{ + return test_relative_error(a.coeffs(), b.coeffs()); +} + +template +typename T1::Scalar test_relative_error(const T1 &a, const T2 &b, const typename T1::MatrixType* = 0) +{ + return test_relative_error(a.matrix(), b.matrix()); +} + +template +S test_relative_error(const Translation &a, const Translation &b) +{ + return test_relative_error(a.vector(), b.vector()); +} + +template +S test_relative_error(const ParametrizedLine &a, const ParametrizedLine &b) +{ + return (std::max)(test_relative_error(a.origin(), b.origin()), test_relative_error(a.origin(), b.origin())); +} + +template +S test_relative_error(const AlignedBox &a, const AlignedBox &b) +{ + return (std::max)(test_relative_error((a.min)(), (b.min)()), test_relative_error((a.max)(), (b.max)())); +} + +template class SparseMatrixBase; +template +typename T1::RealScalar test_relative_error(const MatrixBase &a, const SparseMatrixBase &b) +{ + return test_relative_error(a,b.toDense()); +} + +template class SparseMatrixBase; +template +typename T1::RealScalar test_relative_error(const SparseMatrixBase &a, const MatrixBase &b) +{ + return test_relative_error(a.toDense(),b); +} + +template class SparseMatrixBase; +template +typename T1::RealScalar test_relative_error(const SparseMatrixBase &a, const SparseMatrixBase &b) +{ + return test_relative_error(a.toDense(),b.toDense()); +} + +template +typename NumTraits::Real test_relative_error(const T1 &a, const T2 &b, typename internal::enable_if::Real>::value, T1>::type* = 0) +{ + typedef typename NumTraits::Real RealScalar; + return numext::sqrt(RealScalar(numext::abs2(a-b))/RealScalar((numext::mini)(numext::abs2(a),numext::abs2(b)))); +} + +template +T test_relative_error(const Rotation2D &a, const Rotation2D &b) +{ + return test_relative_error(a.angle(), b.angle()); +} + +template +T test_relative_error(const AngleAxis &a, const AngleAxis &b) +{ + return (std::max)(test_relative_error(a.angle(), b.angle()), test_relative_error(a.axis(), b.axis())); +} + +template +inline bool test_isApprox(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only +{ + return a.isApprox(b, test_precision()); +} + +// get_test_precision is a small wrapper to test_precision allowing to return the scalar precision for either scalars or expressions +template +typename NumTraits::Real get_test_precision(const T&, const typename T::Scalar* = 0) +{ + return test_precision::Real>(); +} + +template +typename NumTraits::Real get_test_precision(const T&,typename internal::enable_if::Real>::value, T>::type* = 0) +{ + return test_precision::Real>(); +} + +// verifyIsApprox is a wrapper to test_isApprox that outputs the relative difference magnitude if the test fails. +template +inline bool verifyIsApprox(const Type1& a, const Type2& b) +{ + bool ret = test_isApprox(a,b); + if(!ret) + { + std::cerr << "Difference too large wrt tolerance " << get_test_precision(a) << ", relative error is: " << test_relative_error(a,b) << std::endl; + } + return ret; +} + +// The idea behind this function is to compare the two scalars a and b where +// the scalar ref is a hint about the expected order of magnitude of a and b. +// WARNING: the scalar a and b must be positive +// Therefore, if for some reason a and b are very small compared to ref, +// we won't issue a false negative. +// This test could be: abs(a-b) <= eps * ref +// However, it seems that simply comparing a+ref and b+ref is more sensitive to true error. +template +inline bool test_isApproxWithRef(const Scalar& a, const Scalar& b, const ScalarRef& ref) +{ + return test_isApprox(a+ref, b+ref); +} + +template +inline bool test_isMuchSmallerThan(const MatrixBase& m1, + const MatrixBase& m2) +{ + return m1.isMuchSmallerThan(m2, test_precision::Scalar>()); +} + +template +inline bool test_isMuchSmallerThan(const MatrixBase& m, + const typename NumTraits::Scalar>::Real& s) +{ + return m.isMuchSmallerThan(s, test_precision::Scalar>()); +} + +template +inline bool test_isUnitary(const MatrixBase& m) +{ + return m.isUnitary(test_precision::Scalar>()); +} + +// Forward declaration to avoid ICC warning +template +bool test_is_equal(const T& actual, const U& expected, bool expect_equal=true); + +template +bool test_is_equal(const T& actual, const U& expected, bool expect_equal) +{ + if ((actual==expected) == expect_equal) + return true; + // false: + std::cerr + << "\n actual = " << actual + << "\n expected " << (expect_equal ? "= " : "!=") << expected << "\n\n"; + return false; +} + +/** Creates a random Partial Isometry matrix of given rank. + * + * A partial isometry is a matrix all of whose singular values are either 0 or 1. + * This is very useful to test rank-revealing algorithms. + */ +// Forward declaration to avoid ICC warning +template +void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m); +template +void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m) +{ + typedef typename internal::traits::Scalar Scalar; + enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; + + typedef Matrix VectorType; + typedef Matrix MatrixAType; + typedef Matrix MatrixBType; + + if(desired_rank == 0) + { + m.setZero(rows,cols); + return; + } + + if(desired_rank == 1) + { + // here we normalize the vectors to get a partial isometry + m = VectorType::Random(rows).normalized() * VectorType::Random(cols).normalized().transpose(); + return; + } + + MatrixAType a = MatrixAType::Random(rows,rows); + MatrixType d = MatrixType::Identity(rows,cols); + MatrixBType b = MatrixBType::Random(cols,cols); + + // set the diagonal such that only desired_rank non-zero entries reamain + const Index diag_size = (std::min)(d.rows(),d.cols()); + if(diag_size != desired_rank) + d.diagonal().segment(desired_rank, diag_size-desired_rank) = VectorType::Zero(diag_size-desired_rank); + + HouseholderQR qra(a); + HouseholderQR qrb(b); + m = qra.householderQ() * d * qrb.householderQ(); +} + +// Forward declaration to avoid ICC warning +template +void randomPermutationVector(PermutationVectorType& v, Index size); +template +void randomPermutationVector(PermutationVectorType& v, Index size) +{ + typedef typename PermutationVectorType::Scalar Scalar; + v.resize(size); + for(Index i = 0; i < size; ++i) v(i) = Scalar(i); + if(size == 1) return; + for(Index n = 0; n < 3 * size; ++n) + { + Index i = internal::random(0, size-1); + Index j; + do j = internal::random(0, size-1); while(j==i); + std::swap(v(i), v(j)); + } +} + +template bool isNotNaN(const T& x) +{ + return x==x; +} + +template bool isPlusInf(const T& x) +{ + return x > NumTraits::highest(); +} + +template bool isMinusInf(const T& x) +{ + return x < NumTraits::lowest(); +} + +} // end namespace Eigen + +template struct GetDifferentType; + +template<> struct GetDifferentType { typedef double type; }; +template<> struct GetDifferentType { typedef float type; }; +template struct GetDifferentType > +{ typedef std::complex::type> type; }; + +// Forward declaration to avoid ICC warning +template std::string type_name(); +template std::string type_name() { return "other"; } +template<> std::string type_name() { return "float"; } +template<> std::string type_name() { return "double"; } +template<> std::string type_name() { return "long double"; } +template<> std::string type_name() { return "int"; } +template<> std::string type_name >() { return "complex"; } +template<> std::string type_name >() { return "complex"; } +template<> std::string type_name >() { return "complex"; } +template<> std::string type_name >() { return "complex"; } + +// forward declaration of the main test function +void EIGEN_CAT(test_,EIGEN_TEST_FUNC)(); + +using namespace Eigen; + +inline void set_repeat_from_string(const char *str) +{ + errno = 0; + g_repeat = int(strtoul(str, 0, 10)); + if(errno || g_repeat <= 0) + { + std::cout << "Invalid repeat value " << str << std::endl; + exit(EXIT_FAILURE); + } + g_has_set_repeat = true; +} + +inline void set_seed_from_string(const char *str) +{ + errno = 0; + g_seed = int(strtoul(str, 0, 10)); + if(errno || g_seed == 0) + { + std::cout << "Invalid seed value " << str << std::endl; + exit(EXIT_FAILURE); + } + g_has_set_seed = true; +} + +int main(int argc, char *argv[]) +{ + g_has_set_repeat = false; + g_has_set_seed = false; + bool need_help = false; + + for(int i = 1; i < argc; i++) + { + if(argv[i][0] == 'r') + { + if(g_has_set_repeat) + { + std::cout << "Argument " << argv[i] << " conflicting with a former argument" << std::endl; + return 1; + } + set_repeat_from_string(argv[i]+1); + } + else if(argv[i][0] == 's') + { + if(g_has_set_seed) + { + std::cout << "Argument " << argv[i] << " conflicting with a former argument" << std::endl; + return 1; + } + set_seed_from_string(argv[i]+1); + } + else + { + need_help = true; + } + } + + if(need_help) + { + std::cout << "This test application takes the following optional arguments:" << std::endl; + std::cout << " rN Repeat each test N times (default: " << DEFAULT_REPEAT << ")" << std::endl; + std::cout << " sN Use N as seed for random numbers (default: based on current time)" << std::endl; + std::cout << std::endl; + std::cout << "If defined, the environment variables EIGEN_REPEAT and EIGEN_SEED" << std::endl; + std::cout << "will be used as default values for these parameters." << std::endl; + return 1; + } + + char *env_EIGEN_REPEAT = getenv("EIGEN_REPEAT"); + if(!g_has_set_repeat && env_EIGEN_REPEAT) + set_repeat_from_string(env_EIGEN_REPEAT); + char *env_EIGEN_SEED = getenv("EIGEN_SEED"); + if(!g_has_set_seed && env_EIGEN_SEED) + set_seed_from_string(env_EIGEN_SEED); + + if(!g_has_set_seed) g_seed = (unsigned int) time(NULL); + if(!g_has_set_repeat) g_repeat = DEFAULT_REPEAT; + + std::cout << "Initializing random number generator with seed " << g_seed << std::endl; + std::stringstream ss; + ss << "Seed: " << g_seed; + g_test_stack.push_back(ss.str()); + srand(g_seed); + std::cout << "Repeating each test " << g_repeat << " times" << std::endl; + + Eigen::g_test_stack.push_back(std::string(EI_PP_MAKE_STRING(EIGEN_TEST_FUNC))); + + EIGEN_CAT(test_,EIGEN_TEST_FUNC)(); + return 0; +} + +// These warning are disabled here such that they are still ON when parsing Eigen's header files. +#if defined __INTEL_COMPILER + // remark #383: value copied to temporary, reference to temporary used + // -> this warning is raised even for legal usage as: g_test_stack.push_back("foo"); where g_test_stack is a std::vector + // remark #1418: external function definition with no prior declaration + // -> this warning is raised for all our test functions. Declaring them static would fix the issue. + // warning #279: controlling expression is constant + // remark #1572: floating-point equality and inequality comparisons are unreliable + #pragma warning disable 279 383 1418 1572 +#endif + +#ifdef _MSC_VER + // 4503 - decorated name length exceeded, name was truncated + #pragma warning( disable : 4503) +#endif diff --git a/thirdparty/eigen/test/mapped_matrix.cpp b/thirdparty/eigen/test/mapped_matrix.cpp new file mode 100644 index 000000000..6a84c5897 --- /dev/null +++ b/thirdparty/eigen/test/mapped_matrix.cpp @@ -0,0 +1,211 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NO_STATIC_ASSERT +#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them +#endif + +#include "main.h" + +#define EIGEN_TESTMAP_MAX_SIZE 256 + +template void map_class_vector(const VectorType& m) +{ + typedef typename VectorType::Index Index; + typedef typename VectorType::Scalar Scalar; + + Index size = m.size(); + + Scalar* array1 = internal::aligned_new(size); + Scalar* array2 = internal::aligned_new(size); + Scalar* array3 = new Scalar[size+1]; + Scalar* array3unaligned = (internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3; + Scalar array4[EIGEN_TESTMAP_MAX_SIZE]; + + Map(array1, size) = VectorType::Random(size); + Map(array2, size) = Map(array1, size); + Map(array3unaligned, size) = Map(array1, size); + Map(array4, size) = Map(array1, size); + VectorType ma1 = Map(array1, size); + VectorType ma2 = Map(array2, size); + VectorType ma3 = Map(array3unaligned, size); + VectorType ma4 = Map(array4, size); + VERIFY_IS_EQUAL(ma1, ma2); + VERIFY_IS_EQUAL(ma1, ma3); + VERIFY_IS_EQUAL(ma1, ma4); + #ifdef EIGEN_VECTORIZE + if(internal::packet_traits::Vectorizable && size>=AlignedMax) + VERIFY_RAISES_ASSERT((Map(array3unaligned, size))) + #endif + + internal::aligned_delete(array1, size); + internal::aligned_delete(array2, size); + delete[] array3; +} + +template void map_class_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = m.rows(), cols = m.cols(), size = rows*cols; + Scalar s1 = internal::random(); + + // array1 and array2 -> aligned heap allocation + Scalar* array1 = internal::aligned_new(size); + for(int i = 0; i < size; i++) array1[i] = Scalar(1); + Scalar* array2 = internal::aligned_new(size); + for(int i = 0; i < size; i++) array2[i] = Scalar(1); + // array3unaligned -> unaligned pointer to heap + Scalar* array3 = new Scalar[size+1]; + for(int i = 0; i < size+1; i++) array3[i] = Scalar(1); + Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + Scalar array4[256]; + if(size<=256) + for(int i = 0; i < size; i++) array4[i] = Scalar(1); + + Map map1(array1, rows, cols); + Map map2(array2, rows, cols); + Map map3(array3unaligned, rows, cols); + Map map4(array4, rows, cols); + + VERIFY_IS_EQUAL(map1, MatrixType::Ones(rows,cols)); + VERIFY_IS_EQUAL(map2, MatrixType::Ones(rows,cols)); + VERIFY_IS_EQUAL(map3, MatrixType::Ones(rows,cols)); + map1 = MatrixType::Random(rows,cols); + map2 = map1; + map3 = map1; + MatrixType ma1 = map1; + MatrixType ma2 = map2; + MatrixType ma3 = map3; + VERIFY_IS_EQUAL(map1, map2); + VERIFY_IS_EQUAL(map1, map3); + VERIFY_IS_EQUAL(ma1, ma2); + VERIFY_IS_EQUAL(ma1, ma3); + VERIFY_IS_EQUAL(ma1, map3); + + VERIFY_IS_APPROX(s1*map1, s1*map2); + VERIFY_IS_APPROX(s1*ma1, s1*ma2); + VERIFY_IS_EQUAL(s1*ma1, s1*ma3); + VERIFY_IS_APPROX(s1*map1, s1*map3); + + map2 *= s1; + map3 *= s1; + VERIFY_IS_APPROX(s1*map1, map2); + VERIFY_IS_APPROX(s1*map1, map3); + + if(size<=256) + { + VERIFY_IS_EQUAL(map4, MatrixType::Ones(rows,cols)); + map4 = map1; + MatrixType ma4 = map4; + VERIFY_IS_EQUAL(map1, map4); + VERIFY_IS_EQUAL(ma1, map4); + VERIFY_IS_EQUAL(ma1, ma4); + VERIFY_IS_APPROX(s1*map1, s1*map4); + + map4 *= s1; + VERIFY_IS_APPROX(s1*map1, map4); + } + + internal::aligned_delete(array1, size); + internal::aligned_delete(array2, size); + delete[] array3; +} + +template void map_static_methods(const VectorType& m) +{ + typedef typename VectorType::Index Index; + typedef typename VectorType::Scalar Scalar; + + Index size = m.size(); + + Scalar* array1 = internal::aligned_new(size); + Scalar* array2 = internal::aligned_new(size); + Scalar* array3 = new Scalar[size+1]; + Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + + VectorType::MapAligned(array1, size) = VectorType::Random(size); + VectorType::Map(array2, size) = VectorType::Map(array1, size); + VectorType::Map(array3unaligned, size) = VectorType::Map(array1, size); + VectorType ma1 = VectorType::Map(array1, size); + VectorType ma2 = VectorType::MapAligned(array2, size); + VectorType ma3 = VectorType::Map(array3unaligned, size); + VERIFY_IS_EQUAL(ma1, ma2); + VERIFY_IS_EQUAL(ma1, ma3); + + internal::aligned_delete(array1, size); + internal::aligned_delete(array2, size); + delete[] array3; +} + +template void check_const_correctness(const PlainObjectType&) +{ + // there's a lot that we can't test here while still having this test compile! + // the only possible approach would be to run a script trying to compile stuff and checking that it fails. + // CMake can help with that. + + // verify that map-to-const don't have LvalueBit + typedef typename internal::add_const::type ConstPlainObjectType; + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(Map::Flags & LvalueBit) ); + VERIFY( !(Map::Flags & LvalueBit) ); +} + +template +void map_not_aligned_on_scalar() +{ + typedef Matrix MatrixType; + typedef typename MatrixType::Index Index; + Index size = 11; + Scalar* array1 = internal::aligned_new((size+1)*(size+1)+1); + Scalar* array2 = reinterpret_cast(sizeof(Scalar)/2+std::size_t(array1)); + Map > map2(array2, size, size, OuterStride<>(size+1)); + MatrixType m2 = MatrixType::Random(size,size); + map2 = m2; + VERIFY_IS_EQUAL(m2, map2); + + typedef Matrix VectorType; + Map map3(array2, size); + MatrixType v3 = VectorType::Random(size); + map3 = v3; + VERIFY_IS_EQUAL(v3, map3); + + internal::aligned_delete(array1, (size+1)*(size+1)+1); +} + +void test_mapped_matrix() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( map_class_vector(Matrix()) ); + CALL_SUBTEST_1( check_const_correctness(Matrix()) ); + CALL_SUBTEST_2( map_class_vector(Vector4d()) ); + CALL_SUBTEST_2( map_class_vector(VectorXd(13)) ); + CALL_SUBTEST_2( check_const_correctness(Matrix4d()) ); + CALL_SUBTEST_3( map_class_vector(RowVector4f()) ); + CALL_SUBTEST_4( map_class_vector(VectorXcf(8)) ); + CALL_SUBTEST_5( map_class_vector(VectorXi(12)) ); + CALL_SUBTEST_5( check_const_correctness(VectorXi(12)) ); + + CALL_SUBTEST_1( map_class_matrix(Matrix()) ); + CALL_SUBTEST_2( map_class_matrix(Matrix4d()) ); + CALL_SUBTEST_11( map_class_matrix(Matrix()) ); + CALL_SUBTEST_4( map_class_matrix(MatrixXcf(internal::random(1,10),internal::random(1,10))) ); + CALL_SUBTEST_5( map_class_matrix(MatrixXi(internal::random(1,10),internal::random(1,10))) ); + + CALL_SUBTEST_6( map_static_methods(Matrix()) ); + CALL_SUBTEST_7( map_static_methods(Vector3f()) ); + CALL_SUBTEST_8( map_static_methods(RowVector3d()) ); + CALL_SUBTEST_9( map_static_methods(VectorXcd(8)) ); + CALL_SUBTEST_10( map_static_methods(VectorXf(12)) ); + + CALL_SUBTEST_11( map_not_aligned_on_scalar() ); + } +} diff --git a/thirdparty/eigen/test/mapstaticmethods.cpp b/thirdparty/eigen/test/mapstaticmethods.cpp new file mode 100644 index 000000000..06272d106 --- /dev/null +++ b/thirdparty/eigen/test/mapstaticmethods.cpp @@ -0,0 +1,175 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +float *ptr; +const float *const_ptr; + +template +struct mapstaticmethods_impl {}; + +template +struct mapstaticmethods_impl +{ + static void run(const PlainObjectType& m) + { + mapstaticmethods_impl::run(m); + + int i = internal::random(2,5), j = internal::random(2,5); + + PlainObjectType::Map(ptr).setZero(); + PlainObjectType::MapAligned(ptr).setZero(); + PlainObjectType::Map(const_ptr).sum(); + PlainObjectType::MapAligned(const_ptr).sum(); + + PlainObjectType::Map(ptr, InnerStride<>(i)).setZero(); + PlainObjectType::MapAligned(ptr, InnerStride<>(i)).setZero(); + PlainObjectType::Map(const_ptr, InnerStride<>(i)).sum(); + PlainObjectType::MapAligned(const_ptr, InnerStride<>(i)).sum(); + + PlainObjectType::Map(ptr, InnerStride<2>()).setZero(); + PlainObjectType::MapAligned(ptr, InnerStride<3>()).setZero(); + PlainObjectType::Map(const_ptr, InnerStride<4>()).sum(); + PlainObjectType::MapAligned(const_ptr, InnerStride<5>()).sum(); + + PlainObjectType::Map(ptr, OuterStride<>(i)).setZero(); + PlainObjectType::MapAligned(ptr, OuterStride<>(i)).setZero(); + PlainObjectType::Map(const_ptr, OuterStride<>(i)).sum(); + PlainObjectType::MapAligned(const_ptr, OuterStride<>(i)).sum(); + + PlainObjectType::Map(ptr, OuterStride<2>()).setZero(); + PlainObjectType::MapAligned(ptr, OuterStride<3>()).setZero(); + PlainObjectType::Map(const_ptr, OuterStride<4>()).sum(); + PlainObjectType::MapAligned(const_ptr, OuterStride<5>()).sum(); + + PlainObjectType::Map(ptr, Stride(i,j)).setZero(); + PlainObjectType::MapAligned(ptr, Stride<2,Dynamic>(2,i)).setZero(); + PlainObjectType::Map(const_ptr, Stride(i,3)).sum(); + PlainObjectType::MapAligned(const_ptr, Stride(i,j)).sum(); + + PlainObjectType::Map(ptr, Stride<2,3>()).setZero(); + PlainObjectType::MapAligned(ptr, Stride<3,4>()).setZero(); + PlainObjectType::Map(const_ptr, Stride<2,4>()).sum(); + PlainObjectType::MapAligned(const_ptr, Stride<5,3>()).sum(); + } +}; + +template +struct mapstaticmethods_impl +{ + static void run(const PlainObjectType& m) + { + typedef typename PlainObjectType::Index Index; + Index rows = m.rows(), cols = m.cols(); + + int i = internal::random(2,5), j = internal::random(2,5); + + PlainObjectType::Map(ptr, rows, cols).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols).setZero(); + PlainObjectType::Map(const_ptr, rows, cols).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols).sum(); + + PlainObjectType::Map(ptr, rows, cols, InnerStride<>(i)).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols, InnerStride<>(i)).setZero(); + PlainObjectType::Map(const_ptr, rows, cols, InnerStride<>(i)).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols, InnerStride<>(i)).sum(); + + PlainObjectType::Map(ptr, rows, cols, InnerStride<2>()).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols, InnerStride<3>()).setZero(); + PlainObjectType::Map(const_ptr, rows, cols, InnerStride<4>()).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols, InnerStride<5>()).sum(); + + PlainObjectType::Map(ptr, rows, cols, OuterStride<>(i)).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols, OuterStride<>(i)).setZero(); + PlainObjectType::Map(const_ptr, rows, cols, OuterStride<>(i)).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols, OuterStride<>(i)).sum(); + + PlainObjectType::Map(ptr, rows, cols, OuterStride<2>()).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols, OuterStride<3>()).setZero(); + PlainObjectType::Map(const_ptr, rows, cols, OuterStride<4>()).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols, OuterStride<5>()).sum(); + + PlainObjectType::Map(ptr, rows, cols, Stride(i,j)).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols, Stride<2,Dynamic>(2,i)).setZero(); + PlainObjectType::Map(const_ptr, rows, cols, Stride(i,3)).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols, Stride(i,j)).sum(); + + PlainObjectType::Map(ptr, rows, cols, Stride<2,3>()).setZero(); + PlainObjectType::MapAligned(ptr, rows, cols, Stride<3,4>()).setZero(); + PlainObjectType::Map(const_ptr, rows, cols, Stride<2,4>()).sum(); + PlainObjectType::MapAligned(const_ptr, rows, cols, Stride<5,3>()).sum(); + } +}; + +template +struct mapstaticmethods_impl +{ + static void run(const PlainObjectType& v) + { + typedef typename PlainObjectType::Index Index; + Index size = v.size(); + + int i = internal::random(2,5); + + PlainObjectType::Map(ptr, size).setZero(); + PlainObjectType::MapAligned(ptr, size).setZero(); + PlainObjectType::Map(const_ptr, size).sum(); + PlainObjectType::MapAligned(const_ptr, size).sum(); + + PlainObjectType::Map(ptr, size, InnerStride<>(i)).setZero(); + PlainObjectType::MapAligned(ptr, size, InnerStride<>(i)).setZero(); + PlainObjectType::Map(const_ptr, size, InnerStride<>(i)).sum(); + PlainObjectType::MapAligned(const_ptr, size, InnerStride<>(i)).sum(); + + PlainObjectType::Map(ptr, size, InnerStride<2>()).setZero(); + PlainObjectType::MapAligned(ptr, size, InnerStride<3>()).setZero(); + PlainObjectType::Map(const_ptr, size, InnerStride<4>()).sum(); + PlainObjectType::MapAligned(const_ptr, size, InnerStride<5>()).sum(); + } +}; + +template +void mapstaticmethods(const PlainObjectType& m) +{ + mapstaticmethods_impl::run(m); + VERIFY(true); // just to avoid 'unused function' warning +} + +void test_mapstaticmethods() +{ + ptr = internal::aligned_new(1000); + for(int i = 0; i < 1000; i++) ptr[i] = float(i); + + const_ptr = ptr; + + CALL_SUBTEST_1(( mapstaticmethods(Matrix()) )); + CALL_SUBTEST_1(( mapstaticmethods(Vector2f()) )); + CALL_SUBTEST_2(( mapstaticmethods(Vector3f()) )); + CALL_SUBTEST_2(( mapstaticmethods(Matrix2f()) )); + CALL_SUBTEST_3(( mapstaticmethods(Matrix4f()) )); + CALL_SUBTEST_3(( mapstaticmethods(Array4f()) )); + CALL_SUBTEST_4(( mapstaticmethods(Array3f()) )); + CALL_SUBTEST_4(( mapstaticmethods(Array33f()) )); + CALL_SUBTEST_5(( mapstaticmethods(Array44f()) )); + CALL_SUBTEST_5(( mapstaticmethods(VectorXf(1)) )); + CALL_SUBTEST_5(( mapstaticmethods(VectorXf(8)) )); + CALL_SUBTEST_6(( mapstaticmethods(MatrixXf(1,1)) )); + CALL_SUBTEST_6(( mapstaticmethods(MatrixXf(5,7)) )); + CALL_SUBTEST_7(( mapstaticmethods(ArrayXf(1)) )); + CALL_SUBTEST_7(( mapstaticmethods(ArrayXf(5)) )); + CALL_SUBTEST_8(( mapstaticmethods(ArrayXXf(1,1)) )); + CALL_SUBTEST_8(( mapstaticmethods(ArrayXXf(8,6)) )); + + internal::aligned_delete(ptr, 1000); +} + diff --git a/thirdparty/eigen/test/mapstride.cpp b/thirdparty/eigen/test/mapstride.cpp new file mode 100644 index 000000000..4858f8fea --- /dev/null +++ b/thirdparty/eigen/test/mapstride.cpp @@ -0,0 +1,181 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void map_class_vector(const VectorType& m) +{ + typedef typename VectorType::Index Index; + typedef typename VectorType::Scalar Scalar; + + Index size = m.size(); + + VectorType v = VectorType::Random(size); + + Index arraysize = 3*size; + + Scalar* a_array = internal::aligned_new(arraysize+1); + Scalar* array = a_array; + if(Alignment!=Aligned) + array = (Scalar*)(internal::IntPtr(a_array) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + + { + Map > map(array, size); + map = v; + for(int i = 0; i < size; ++i) + { + VERIFY(array[3*i] == v[i]); + VERIFY(map[i] == v[i]); + } + } + + { + Map > map(array, size, InnerStride(2)); + map = v; + for(int i = 0; i < size; ++i) + { + VERIFY(array[2*i] == v[i]); + VERIFY(map[i] == v[i]); + } + } + + internal::aligned_delete(a_array, arraysize+1); +} + +template void map_class_matrix(const MatrixType& _m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = _m.rows(), cols = _m.cols(); + + MatrixType m = MatrixType::Random(rows,cols); + Scalar s1 = internal::random(); + + Index arraysize = 2*(rows+4)*(cols+4); + + Scalar* a_array1 = internal::aligned_new(arraysize+1); + Scalar* array1 = a_array1; + if(Alignment!=Aligned) + array1 = (Scalar*)(internal::IntPtr(a_array1) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + + Scalar a_array2[256]; + Scalar* array2 = a_array2; + if(Alignment!=Aligned) + array2 = (Scalar*)(internal::IntPtr(a_array2) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + else + array2 = (Scalar*)(((internal::UIntPtr(a_array2)+EIGEN_MAX_ALIGN_BYTES-1)/EIGEN_MAX_ALIGN_BYTES)*EIGEN_MAX_ALIGN_BYTES); + Index maxsize2 = a_array2 - array2 + 256; + + // test no inner stride and some dynamic outer stride + for(int k=0; k<2; ++k) + { + if(k==1 && (m.innerSize()+1)*m.outerSize() > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + + Map > map(array, rows, cols, OuterStride(m.innerSize()+1)); + map = m; + VERIFY(map.outerStride() == map.innerSize()+1); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); + } + + // test no inner stride and an outer stride of +4. This is quite important as for fixed-size matrices, + // this allows to hit the special case where it's vectorizable. + for(int k=0; k<2; ++k) + { + if(k==1 && (m.innerSize()+4)*m.outerSize() > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + + enum { + InnerSize = MatrixType::InnerSizeAtCompileTime, + OuterStrideAtCompileTime = InnerSize==Dynamic ? Dynamic : InnerSize+4 + }; + Map > + map(array, rows, cols, OuterStride(m.innerSize()+4)); + map = m; + VERIFY(map.outerStride() == map.innerSize()+4); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); + } + + // test both inner stride and outer stride + for(int k=0; k<2; ++k) + { + if(k==1 && (2*m.innerSize()+1)*(m.outerSize()*2) > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + + Map > map(array, rows, cols, Stride(2*m.innerSize()+1, 2)); + map = m; + VERIFY(map.outerStride() == 2*map.innerSize()+1); + VERIFY(map.innerStride() == 2); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.outerStride()*i+map.innerStride()*j] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); + } + + internal::aligned_delete(a_array1, arraysize+1); +} + +void test_mapstride() +{ + for(int i = 0; i < g_repeat; i++) { + int maxn = 30; + CALL_SUBTEST_1( map_class_vector(Matrix()) ); + CALL_SUBTEST_1( map_class_vector(Matrix()) ); + CALL_SUBTEST_2( map_class_vector(Vector4d()) ); + CALL_SUBTEST_2( map_class_vector(Vector4d()) ); + CALL_SUBTEST_3( map_class_vector(RowVector4f()) ); + CALL_SUBTEST_3( map_class_vector(RowVector4f()) ); + CALL_SUBTEST_4( map_class_vector(VectorXcf(internal::random(1,maxn))) ); + CALL_SUBTEST_4( map_class_vector(VectorXcf(internal::random(1,maxn))) ); + CALL_SUBTEST_5( map_class_vector(VectorXi(internal::random(1,maxn))) ); + CALL_SUBTEST_5( map_class_vector(VectorXi(internal::random(1,maxn))) ); + + CALL_SUBTEST_1( map_class_matrix(Matrix()) ); + CALL_SUBTEST_1( map_class_matrix(Matrix()) ); + CALL_SUBTEST_2( map_class_matrix(Matrix4d()) ); + CALL_SUBTEST_2( map_class_matrix(Matrix4d()) ); + CALL_SUBTEST_3( map_class_matrix(Matrix()) ); + CALL_SUBTEST_3( map_class_matrix(Matrix()) ); + CALL_SUBTEST_3( map_class_matrix(Matrix()) ); + CALL_SUBTEST_3( map_class_matrix(Matrix()) ); + CALL_SUBTEST_4( map_class_matrix(MatrixXcf(internal::random(1,maxn),internal::random(1,maxn))) ); + CALL_SUBTEST_4( map_class_matrix(MatrixXcf(internal::random(1,maxn),internal::random(1,maxn))) ); + CALL_SUBTEST_5( map_class_matrix(MatrixXi(internal::random(1,maxn),internal::random(1,maxn))) ); + CALL_SUBTEST_5( map_class_matrix(MatrixXi(internal::random(1,maxn),internal::random(1,maxn))) ); + CALL_SUBTEST_6( map_class_matrix(MatrixXcd(internal::random(1,maxn),internal::random(1,maxn))) ); + CALL_SUBTEST_6( map_class_matrix(MatrixXcd(internal::random(1,maxn),internal::random(1,maxn))) ); + + TEST_SET_BUT_UNUSED_VARIABLE(maxn); + } +} diff --git a/thirdparty/eigen/test/meta.cpp b/thirdparty/eigen/test/meta.cpp new file mode 100644 index 000000000..b8dea68e8 --- /dev/null +++ b/thirdparty/eigen/test/meta.cpp @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +bool check_is_convertible(const From&, const To&) +{ + return internal::is_convertible::value; +} + +void test_meta() +{ + VERIFY((internal::conditional<(3<4),internal::true_type, internal::false_type>::type::value)); + VERIFY(( internal::is_same::value)); + VERIFY((!internal::is_same::value)); + VERIFY((!internal::is_same::value)); + VERIFY((!internal::is_same::value)); + + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + + // test add_const + VERIFY(( internal::is_same< internal::add_const::type, const float >::value)); + VERIFY(( internal::is_same< internal::add_const::type, float* const>::value)); + VERIFY(( internal::is_same< internal::add_const::type, float const* const>::value)); + VERIFY(( internal::is_same< internal::add_const::type, float& >::value)); + + // test remove_const + VERIFY(( internal::is_same< internal::remove_const::type, float const* >::value)); + VERIFY(( internal::is_same< internal::remove_const::type, float const* >::value)); + VERIFY(( internal::is_same< internal::remove_const::type, float* >::value)); + + // test add_const_on_value_type + VERIFY(( internal::is_same< internal::add_const_on_value_type::type, float const& >::value)); + VERIFY(( internal::is_same< internal::add_const_on_value_type::type, float const* >::value)); + + VERIFY(( internal::is_same< internal::add_const_on_value_type::type, const float >::value)); + VERIFY(( internal::is_same< internal::add_const_on_value_type::type, const float >::value)); + + VERIFY(( internal::is_same< internal::add_const_on_value_type::type, const float* const>::value)); + VERIFY(( internal::is_same< internal::add_const_on_value_type::type, const float* const>::value)); + + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_same::type >::value)); + + VERIFY(( internal::is_convertible::value )); + VERIFY(( internal::is_convertible::value )); + VERIFY(( internal::is_convertible::value )); + VERIFY((!internal::is_convertible,double>::value )); + VERIFY(( internal::is_convertible::value )); +// VERIFY((!internal::is_convertible::value )); //does not work because the conversion is prevented by a static assertion + VERIFY((!internal::is_convertible::value )); + VERIFY((!internal::is_convertible::value )); + { + float f; + MatrixXf A, B; + VectorXf a, b; + VERIFY(( check_is_convertible(a.dot(b), f) )); + VERIFY(( check_is_convertible(a.transpose()*b, f) )); + VERIFY((!check_is_convertible(A*B, f) )); + VERIFY(( check_is_convertible(A*B, A) )); + } + + VERIFY(internal::meta_sqrt<1>::ret == 1); + #define VERIFY_META_SQRT(X) VERIFY(internal::meta_sqrt::ret == int(std::sqrt(double(X)))) + VERIFY_META_SQRT(2); + VERIFY_META_SQRT(3); + VERIFY_META_SQRT(4); + VERIFY_META_SQRT(5); + VERIFY_META_SQRT(6); + VERIFY_META_SQRT(8); + VERIFY_META_SQRT(9); + VERIFY_META_SQRT(15); + VERIFY_META_SQRT(16); + VERIFY_META_SQRT(17); + VERIFY_META_SQRT(255); + VERIFY_META_SQRT(256); + VERIFY_META_SQRT(257); + VERIFY_META_SQRT(1023); + VERIFY_META_SQRT(1024); + VERIFY_META_SQRT(1025); +} diff --git a/thirdparty/eigen/test/metis_support.cpp b/thirdparty/eigen/test/metis_support.cpp new file mode 100644 index 000000000..d87c56a13 --- /dev/null +++ b/thirdparty/eigen/test/metis_support.cpp @@ -0,0 +1,25 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include +#include +#include + +template void test_metis_T() +{ + SparseLU, MetisOrdering > sparselu_metis; + + check_sparse_square_solving(sparselu_metis); +} + +void test_metis_support() +{ + CALL_SUBTEST_1(test_metis_T()); +} diff --git a/thirdparty/eigen/test/miscmatrices.cpp b/thirdparty/eigen/test/miscmatrices.cpp new file mode 100644 index 000000000..ef20dc749 --- /dev/null +++ b/thirdparty/eigen/test/miscmatrices.cpp @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void miscMatrices(const MatrixType& m) +{ + /* this test covers the following files: + DiagonalMatrix.h Ones.h + */ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + Index rows = m.rows(); + Index cols = m.cols(); + + Index r = internal::random(0, rows-1), r2 = internal::random(0, rows-1), c = internal::random(0, cols-1); + VERIFY_IS_APPROX(MatrixType::Ones(rows,cols)(r,c), static_cast(1)); + MatrixType m1 = MatrixType::Ones(rows,cols); + VERIFY_IS_APPROX(m1(r,c), static_cast(1)); + VectorType v1 = VectorType::Random(rows); + v1[0]; + Matrix + square(v1.asDiagonal()); + if(r==r2) VERIFY_IS_APPROX(square(r,r2), v1[r]); + else VERIFY_IS_MUCH_SMALLER_THAN(square(r,r2), static_cast(1)); + square = MatrixType::Zero(rows, rows); + square.diagonal() = VectorType::Ones(rows); + VERIFY_IS_APPROX(square, MatrixType::Identity(rows, rows)); +} + +void test_miscmatrices() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( miscMatrices(Matrix()) ); + CALL_SUBTEST_2( miscMatrices(Matrix4d()) ); + CALL_SUBTEST_3( miscMatrices(MatrixXcf(3, 3)) ); + CALL_SUBTEST_4( miscMatrices(MatrixXi(8, 12)) ); + CALL_SUBTEST_5( miscMatrices(MatrixXcd(20, 20)) ); + } +} diff --git a/thirdparty/eigen/test/mixingtypes.cpp b/thirdparty/eigen/test/mixingtypes.cpp new file mode 100644 index 000000000..ad9c2c652 --- /dev/null +++ b/thirdparty/eigen/test/mixingtypes.cpp @@ -0,0 +1,300 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2015 Gael Guennebaud +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// work around "uninitialized" warnings and give that option some testing +#define EIGEN_INITIALIZE_MATRICES_BY_ZERO + +#ifndef EIGEN_NO_STATIC_ASSERT +#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them +#endif + +#if defined(EIGEN_TEST_PART_1) || defined(EIGEN_TEST_PART_2) || defined(EIGEN_TEST_PART_3) + +#ifndef EIGEN_DONT_VECTORIZE +#define EIGEN_DONT_VECTORIZE +#endif + +#endif + +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + +#include "main.h" + +using namespace std; + +#define VERIFY_MIX_SCALAR(XPR,REF) \ + g_called = false; \ + VERIFY_IS_APPROX(XPR,REF); \ + VERIFY( g_called && #XPR" not properly optimized"); + +template void mixingtypes(int size = SizeAtCompileType) +{ + typedef std::complex CF; + typedef std::complex CD; + typedef Matrix Mat_f; + typedef Matrix Mat_d; + typedef Matrix, SizeAtCompileType, SizeAtCompileType> Mat_cf; + typedef Matrix, SizeAtCompileType, SizeAtCompileType> Mat_cd; + typedef Matrix Vec_f; + typedef Matrix Vec_d; + typedef Matrix, SizeAtCompileType, 1> Vec_cf; + typedef Matrix, SizeAtCompileType, 1> Vec_cd; + + Mat_f mf = Mat_f::Random(size,size); + Mat_d md = mf.template cast(); + //Mat_d rd = md; + Mat_cf mcf = Mat_cf::Random(size,size); + Mat_cd mcd = mcf.template cast >(); + Mat_cd rcd = mcd; + Vec_f vf = Vec_f::Random(size,1); + Vec_d vd = vf.template cast(); + Vec_cf vcf = Vec_cf::Random(size,1); + Vec_cd vcd = vcf.template cast >(); + float sf = internal::random(); + double sd = internal::random(); + complex scf = internal::random >(); + complex scd = internal::random >(); + + mf+mf; + + float epsf = std::sqrt(std::numeric_limits ::min EIGEN_EMPTY ()); + double epsd = std::sqrt(std::numeric_limits::min EIGEN_EMPTY ()); + + while(std::abs(sf )(); + while(std::abs(sd )(); + while(std::abs(scf)(); + while(std::abs(scd)(); + +// VERIFY_RAISES_ASSERT(mf+md); // does not even compile + +#ifdef EIGEN_DONT_VECTORIZE + VERIFY_RAISES_ASSERT(vf=vd); + VERIFY_RAISES_ASSERT(vf+=vd); +#endif + + // check scalar products + VERIFY_MIX_SCALAR(vcf * sf , vcf * complex(sf)); + VERIFY_MIX_SCALAR(sd * vcd , complex(sd) * vcd); + VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); + VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); + + VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex(2)); + VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex(2.1)); + VERIFY_MIX_SCALAR(2 * vcf, vcf * complex(2)); + VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex(2.1)); + + // check scalar quotients + VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); + VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vf.array() / scf, vf.template cast >().array() / scf); + VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast >().array()); + + // check scalar increment + VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_MIX_SCALAR(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_MIX_SCALAR(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast >().array()); + + // check scalar subtractions + VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_MIX_SCALAR(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_MIX_SCALAR(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast >().array()); + + // check scalar powers + VERIFY_MIX_SCALAR( pow(vcf.array(), sf), Eigen::pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( vcf.array().pow(sf) , Eigen::pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( pow(sd, vcd.array()), Eigen::pow(complex(sd), vcd.array()) ); + VERIFY_MIX_SCALAR( Eigen::pow(vf.array(), scf), Eigen::pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( vf.array().pow(scf) , Eigen::pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( Eigen::pow(scd, vd.array()), Eigen::pow(scd, vd.template cast >().array()) ); + + // check dot product + vf.dot(vf); +#if 0 // we get other compilation errors here than just static asserts + VERIFY_RAISES_ASSERT(vd.dot(vf)); +#endif + VERIFY_IS_APPROX(vcf.dot(vf), vcf.dot(vf.template cast >())); + + // check diagonal product + VERIFY_IS_APPROX(vf.asDiagonal() * mcf, vf.template cast >().asDiagonal() * mcf); + VERIFY_IS_APPROX(vcd.asDiagonal() * md, vcd.asDiagonal() * md.template cast >()); + VERIFY_IS_APPROX(mcf * vf.asDiagonal(), mcf * vf.template cast >().asDiagonal()); + VERIFY_IS_APPROX(md * vcd.asDiagonal(), md.template cast >() * vcd.asDiagonal()); + +// vd.asDiagonal() * mf; // does not even compile +// vcd.asDiagonal() * mf; // does not even compile + + // check inner product + VERIFY_IS_APPROX((vf.transpose() * vcf).value(), (vf.template cast >().transpose() * vcf).value()); + + // check outer product + VERIFY_IS_APPROX((vf * vcf.transpose()).eval(), (vf.template cast >() * vcf.transpose()).eval()); + + // coeff wise product + + VERIFY_IS_APPROX((vf * vcf.transpose()).eval(), (vf.template cast >() * vcf.transpose()).eval()); + + Mat_cd mcd2 = mcd; + VERIFY_IS_APPROX(mcd.array() *= md.array(), mcd2.array() *= md.array().template cast >()); + + // check matrix-matrix products + VERIFY_IS_APPROX(sd*md*mcd, (sd*md).template cast().eval()*mcd); + VERIFY_IS_APPROX(sd*mcd*md, sd*mcd*md.template cast()); + VERIFY_IS_APPROX(scd*md*mcd, scd*md.template cast().eval()*mcd); + VERIFY_IS_APPROX(scd*mcd*md, scd*mcd*md.template cast()); + + VERIFY_IS_APPROX(sf*mf*mcf, sf*mf.template cast()*mcf); + VERIFY_IS_APPROX(sf*mcf*mf, sf*mcf*mf.template cast()); + VERIFY_IS_APPROX(scf*mf*mcf, scf*mf.template cast()*mcf); + VERIFY_IS_APPROX(scf*mcf*mf, scf*mcf*mf.template cast()); + + VERIFY_IS_APPROX(sd*md.adjoint()*mcd, (sd*md).template cast().eval().adjoint()*mcd); + VERIFY_IS_APPROX(sd*mcd.adjoint()*md, sd*mcd.adjoint()*md.template cast()); + VERIFY_IS_APPROX(sd*md.adjoint()*mcd.adjoint(), (sd*md).template cast().eval().adjoint()*mcd.adjoint()); + VERIFY_IS_APPROX(sd*mcd.adjoint()*md.adjoint(), sd*mcd.adjoint()*md.template cast().adjoint()); + VERIFY_IS_APPROX(sd*md*mcd.adjoint(), (sd*md).template cast().eval()*mcd.adjoint()); + VERIFY_IS_APPROX(sd*mcd*md.adjoint(), sd*mcd*md.template cast().adjoint()); + + VERIFY_IS_APPROX(sf*mf.adjoint()*mcf, (sf*mf).template cast().eval().adjoint()*mcf); + VERIFY_IS_APPROX(sf*mcf.adjoint()*mf, sf*mcf.adjoint()*mf.template cast()); + VERIFY_IS_APPROX(sf*mf.adjoint()*mcf.adjoint(), (sf*mf).template cast().eval().adjoint()*mcf.adjoint()); + VERIFY_IS_APPROX(sf*mcf.adjoint()*mf.adjoint(), sf*mcf.adjoint()*mf.template cast().adjoint()); + VERIFY_IS_APPROX(sf*mf*mcf.adjoint(), (sf*mf).template cast().eval()*mcf.adjoint()); + VERIFY_IS_APPROX(sf*mcf*mf.adjoint(), sf*mcf*mf.template cast().adjoint()); + + VERIFY_IS_APPROX(sf*mf*vcf, (sf*mf).template cast().eval()*vcf); + VERIFY_IS_APPROX(scf*mf*vcf,(scf*mf.template cast()).eval()*vcf); + VERIFY_IS_APPROX(sf*mcf*vf, sf*mcf*vf.template cast()); + VERIFY_IS_APPROX(scf*mcf*vf,scf*mcf*vf.template cast()); + + VERIFY_IS_APPROX(sf*vcf.adjoint()*mf, sf*vcf.adjoint()*mf.template cast().eval()); + VERIFY_IS_APPROX(scf*vcf.adjoint()*mf, scf*vcf.adjoint()*mf.template cast().eval()); + VERIFY_IS_APPROX(sf*vf.adjoint()*mcf, sf*vf.adjoint().template cast().eval()*mcf); + VERIFY_IS_APPROX(scf*vf.adjoint()*mcf, scf*vf.adjoint().template cast().eval()*mcf); + + VERIFY_IS_APPROX(sd*md*vcd, (sd*md).template cast().eval()*vcd); + VERIFY_IS_APPROX(scd*md*vcd,(scd*md.template cast()).eval()*vcd); + VERIFY_IS_APPROX(sd*mcd*vd, sd*mcd*vd.template cast().eval()); + VERIFY_IS_APPROX(scd*mcd*vd,scd*mcd*vd.template cast().eval()); + + VERIFY_IS_APPROX(sd*vcd.adjoint()*md, sd*vcd.adjoint()*md.template cast().eval()); + VERIFY_IS_APPROX(scd*vcd.adjoint()*md, scd*vcd.adjoint()*md.template cast().eval()); + VERIFY_IS_APPROX(sd*vd.adjoint()*mcd, sd*vd.adjoint().template cast().eval()*mcd); + VERIFY_IS_APPROX(scd*vd.adjoint()*mcd, scd*vd.adjoint().template cast().eval()*mcd); + + VERIFY_IS_APPROX( sd*vcd.adjoint()*md.template triangularView(), sd*vcd.adjoint()*md.template cast().eval().template triangularView()); + VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template triangularView(), scd*vcd.adjoint()*md.template cast().eval().template triangularView()); + VERIFY_IS_APPROX( sd*vcd.adjoint()*md.transpose().template triangularView(), sd*vcd.adjoint()*md.transpose().template cast().eval().template triangularView()); + VERIFY_IS_APPROX(scd*vcd.adjoint()*md.transpose().template triangularView(), scd*vcd.adjoint()*md.transpose().template cast().eval().template triangularView()); + VERIFY_IS_APPROX( sd*vd.adjoint()*mcd.template triangularView(), sd*vd.adjoint().template cast().eval()*mcd.template triangularView()); + VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template triangularView(), scd*vd.adjoint().template cast().eval()*mcd.template triangularView()); + VERIFY_IS_APPROX( sd*vd.adjoint()*mcd.transpose().template triangularView(), sd*vd.adjoint().template cast().eval()*mcd.transpose().template triangularView()); + VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.transpose().template triangularView(), scd*vd.adjoint().template cast().eval()*mcd.transpose().template triangularView()); + + // Not supported yet: trmm +// VERIFY_IS_APPROX(sd*mcd*md.template triangularView(), sd*mcd*md.template cast().eval().template triangularView()); +// VERIFY_IS_APPROX(scd*mcd*md.template triangularView(), scd*mcd*md.template cast().eval().template triangularView()); +// VERIFY_IS_APPROX(sd*md*mcd.template triangularView(), sd*md.template cast().eval()*mcd.template triangularView()); +// VERIFY_IS_APPROX(scd*md*mcd.template triangularView(), scd*md.template cast().eval()*mcd.template triangularView()); + + // Not supported yet: symv +// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView(), sd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView(), scd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView(), sd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); +// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView(), scd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); + + // Not supported yet: symm +// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView(), sd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView(), scd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView(), sd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); +// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView(), scd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); + + rcd.setZero(); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = sd * mcd * md), + Mat_cd((sd * mcd * md.template cast().eval()).template triangularView())); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = sd * md * mcd), + Mat_cd((sd * md.template cast().eval() * mcd).template triangularView())); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = scd * mcd * md), + Mat_cd((scd * mcd * md.template cast().eval()).template triangularView())); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = scd * md * mcd), + Mat_cd((scd * md.template cast().eval() * mcd).template triangularView())); + + + VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); + VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() + mcd.array(), md.template cast().eval().array() + mcd.array() ); + VERIFY_IS_APPROX( mcd.array() + md.array(), mcd.array() + md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); + VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); + + if(mcd.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); + } + if(md.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + } + + if(md.array().abs().minCoeff()>epsd || mcd.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + + VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( pow(mcd.array(),md.array()), mcd.array().pow(md.template cast().eval().array()) ); + } + + rcd = mcd; + VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd += md, mcd + md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast().eval().array() ); + rcd = mcd; + if(md.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); + } + + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + + VERIFY_IS_APPROX( rcd.noalias() = md*md, ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast()) ); + + VERIFY_IS_APPROX( rcd.noalias() = mcd + md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md, - ((md*md).eval().template cast()) ); +} + +void test_mixingtypes() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(mixingtypes<3>()); + CALL_SUBTEST_2(mixingtypes<4>()); + CALL_SUBTEST_3(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + + CALL_SUBTEST_4(mixingtypes<3>()); + CALL_SUBTEST_5(mixingtypes<4>()); + CALL_SUBTEST_6(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + } +} diff --git a/thirdparty/eigen/test/mpl2only.cpp b/thirdparty/eigen/test/mpl2only.cpp new file mode 100644 index 000000000..5ef0d2b2e --- /dev/null +++ b/thirdparty/eigen/test/mpl2only.cpp @@ -0,0 +1,20 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_MPL2_ONLY +#include +#include +#include +#include +#include + +int main() +{ + return 0; +} diff --git a/thirdparty/eigen/test/nesting_ops.cpp b/thirdparty/eigen/test/nesting_ops.cpp new file mode 100644 index 000000000..a419b0e44 --- /dev/null +++ b/thirdparty/eigen/test/nesting_ops.cpp @@ -0,0 +1,107 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define TEST_ENABLE_TEMPORARY_TRACKING + +#include "main.h" + +template +void use_n_times(const XprType &xpr) +{ + typename internal::nested_eval::type mat(xpr); + typename XprType::PlainObject res(mat.rows(), mat.cols()); + nb_temporaries--; // remove res + res.setZero(); + for(int i=0; i +bool verify_eval_type(const XprType &, const ReferenceType&) +{ + typedef typename internal::nested_eval::type EvalType; + return internal::is_same::type, typename internal::remove_all::type>::value; +} + +template void run_nesting_ops_1(const MatrixType& _m) +{ + typename internal::nested_eval::type m(_m); + + // Make really sure that we are in debug mode! + VERIFY_RAISES_ASSERT(eigen_assert(false)); + + // The only intention of these tests is to ensure that this code does + // not trigger any asserts or segmentation faults... more to come. + VERIFY_IS_APPROX( (m.transpose() * m).diagonal().sum(), (m.transpose() * m).diagonal().sum() ); + VERIFY_IS_APPROX( (m.transpose() * m).diagonal().array().abs().sum(), (m.transpose() * m).diagonal().array().abs().sum() ); + + VERIFY_IS_APPROX( (m.transpose() * m).array().abs().sum(), (m.transpose() * m).array().abs().sum() ); +} + +template void run_nesting_ops_2(const MatrixType& _m) +{ + typedef typename MatrixType::Scalar Scalar; + Index rows = _m.rows(); + Index cols = _m.cols(); + MatrixType m1 = MatrixType::Random(rows,cols); + Matrix m2; + + if((MatrixType::SizeAtCompileTime==Dynamic)) + { + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.template triangularView().solve(m1.col(0))), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.template triangularView().solve(m1.col(0))), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(Scalar(2)*m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by applying the scaling in-place on the solve result + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by adding m1.col() inplace + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); + } + + { + VERIFY( verify_eval_type<10>(m1, m1) ); + if(!NumTraits::IsComplex) + { + VERIFY( verify_eval_type<3>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<4>(2*m1, m1) ); + } + else + { + VERIFY( verify_eval_type<2>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<3>(2*m1, m1) ); + } + VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); + VERIFY( verify_eval_type<3>(m1+m1, m1) ); + VERIFY( verify_eval_type<1>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m2) ); + VERIFY( verify_eval_type<2>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1+m1*m1, m1) ); + + VERIFY( verify_eval_type<1>(m1.template triangularView().solve(m1), m1) ); + VERIFY( verify_eval_type<1>(m1+m1.template triangularView().solve(m1), m1) ); + } +} + + +void test_nesting_ops() +{ + CALL_SUBTEST_1(run_nesting_ops_1(MatrixXf::Random(25,25))); + CALL_SUBTEST_2(run_nesting_ops_1(MatrixXcd::Random(25,25))); + CALL_SUBTEST_3(run_nesting_ops_1(Matrix4f::Random())); + CALL_SUBTEST_4(run_nesting_ops_1(Matrix2d::Random())); + + Index s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( run_nesting_ops_2(MatrixXf(s,s)) ); + CALL_SUBTEST_2( run_nesting_ops_2(MatrixXcd(s,s)) ); + CALL_SUBTEST_3( run_nesting_ops_2(Matrix4f()) ); + CALL_SUBTEST_4( run_nesting_ops_2(Matrix2d()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) +} diff --git a/thirdparty/eigen/test/nomalloc.cpp b/thirdparty/eigen/test/nomalloc.cpp new file mode 100644 index 000000000..50756c2fb --- /dev/null +++ b/thirdparty/eigen/test/nomalloc.cpp @@ -0,0 +1,229 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// discard stack allocation as that too bypasses malloc +#define EIGEN_STACK_ALLOCATION_LIMIT 0 +// heap allocation will raise an assert if enabled at runtime +#define EIGEN_RUNTIME_NO_MALLOC + +#include "main.h" +#include +#include +#include +#include +#include + +template void nomalloc(const MatrixType& m) +{ + /* this test check no dynamic memory allocation are issued with fixed-size matrices + */ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols); + + Scalar s1 = internal::random(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2); + VERIFY_IS_APPROX((m1+m2)(r,c), (m1(r,c))+(m2(r,c))); + VERIFY_IS_APPROX(m1.cwiseProduct(m1.block(0,0,rows,cols)), (m1.array()*m1.array()).matrix()); + VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2)); + + m2.col(0).noalias() = m1 * m1.col(0); + m2.col(0).noalias() -= m1.adjoint() * m1.col(0); + m2.col(0).noalias() -= m1 * m1.row(0).adjoint(); + m2.col(0).noalias() -= m1.adjoint() * m1.row(0).adjoint(); + + m2.row(0).noalias() = m1.row(0) * m1; + m2.row(0).noalias() -= m1.row(0) * m1.adjoint(); + m2.row(0).noalias() -= m1.col(0).adjoint() * m1; + m2.row(0).noalias() -= m1.col(0).adjoint() * m1.adjoint(); + VERIFY_IS_APPROX(m2,m2); + + m2.col(0).noalias() = m1.template triangularView() * m1.col(0); + m2.col(0).noalias() -= m1.adjoint().template triangularView() * m1.col(0); + m2.col(0).noalias() -= m1.template triangularView() * m1.row(0).adjoint(); + m2.col(0).noalias() -= m1.adjoint().template triangularView() * m1.row(0).adjoint(); + + m2.row(0).noalias() = m1.row(0) * m1.template triangularView(); + m2.row(0).noalias() -= m1.row(0) * m1.adjoint().template triangularView(); + m2.row(0).noalias() -= m1.col(0).adjoint() * m1.template triangularView(); + m2.row(0).noalias() -= m1.col(0).adjoint() * m1.adjoint().template triangularView(); + VERIFY_IS_APPROX(m2,m2); + + m2.col(0).noalias() = m1.template selfadjointView() * m1.col(0); + m2.col(0).noalias() -= m1.adjoint().template selfadjointView() * m1.col(0); + m2.col(0).noalias() -= m1.template selfadjointView() * m1.row(0).adjoint(); + m2.col(0).noalias() -= m1.adjoint().template selfadjointView() * m1.row(0).adjoint(); + + m2.row(0).noalias() = m1.row(0) * m1.template selfadjointView(); + m2.row(0).noalias() -= m1.row(0) * m1.adjoint().template selfadjointView(); + m2.row(0).noalias() -= m1.col(0).adjoint() * m1.template selfadjointView(); + m2.row(0).noalias() -= m1.col(0).adjoint() * m1.adjoint().template selfadjointView(); + VERIFY_IS_APPROX(m2,m2); + + m2.template selfadjointView().rankUpdate(m1.col(0),-1); + m2.template selfadjointView().rankUpdate(m1.row(0),-1); + m2.template selfadjointView().rankUpdate(m1.col(0), m1.col(0)); // rank-2 + + // The following fancy matrix-matrix products are not safe yet regarding static allocation + m2.template selfadjointView().rankUpdate(m1); + m2 += m2.template triangularView() * m1; + m2.template triangularView() = m2 * m2; + m1 += m1.template selfadjointView() * m2; + VERIFY_IS_APPROX(m2,m2); +} + +template +void ctms_decompositions() +{ + const int maxSize = 16; + const int size = 12; + + typedef Eigen::Matrix Matrix; + + typedef Eigen::Matrix Vector; + + typedef Eigen::Matrix, + Eigen::Dynamic, Eigen::Dynamic, + 0, + maxSize, maxSize> ComplexMatrix; + + const Matrix A(Matrix::Random(size, size)), B(Matrix::Random(size, size)); + Matrix X(size,size); + const ComplexMatrix complexA(ComplexMatrix::Random(size, size)); + const Matrix saA = A.adjoint() * A; + const Vector b(Vector::Random(size)); + Vector x(size); + + // Cholesky module + Eigen::LLT LLT; LLT.compute(A); + X = LLT.solve(B); + x = LLT.solve(b); + Eigen::LDLT LDLT; LDLT.compute(A); + X = LDLT.solve(B); + x = LDLT.solve(b); + + // Eigenvalues module + Eigen::HessenbergDecomposition hessDecomp; hessDecomp.compute(complexA); + Eigen::ComplexSchur cSchur(size); cSchur.compute(complexA); + Eigen::ComplexEigenSolver cEigSolver; cEigSolver.compute(complexA); + Eigen::EigenSolver eigSolver; eigSolver.compute(A); + Eigen::SelfAdjointEigenSolver saEigSolver(size); saEigSolver.compute(saA); + Eigen::Tridiagonalization tridiag; tridiag.compute(saA); + + // LU module + Eigen::PartialPivLU ppLU; ppLU.compute(A); + X = ppLU.solve(B); + x = ppLU.solve(b); + Eigen::FullPivLU fpLU; fpLU.compute(A); + X = fpLU.solve(B); + x = fpLU.solve(b); + + // QR module + Eigen::HouseholderQR hQR; hQR.compute(A); + X = hQR.solve(B); + x = hQR.solve(b); + Eigen::ColPivHouseholderQR cpQR; cpQR.compute(A); + X = cpQR.solve(B); + x = cpQR.solve(b); + Eigen::FullPivHouseholderQR fpQR; fpQR.compute(A); + // FIXME X = fpQR.solve(B); + x = fpQR.solve(b); + + // SVD module + Eigen::JacobiSVD jSVD; jSVD.compute(A, ComputeFullU | ComputeFullV); +} + +void test_zerosized() { + // default constructors: + Eigen::MatrixXd A; + Eigen::VectorXd v; + // explicit zero-sized: + Eigen::ArrayXXd A0(0,0); + Eigen::ArrayXd v0(0); + + // assigning empty objects to each other: + A=A0; + v=v0; +} + +template void test_reference(const MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + enum { Flag = MatrixType::IsRowMajor ? Eigen::RowMajor : Eigen::ColMajor}; + enum { TransposeFlag = !MatrixType::IsRowMajor ? Eigen::RowMajor : Eigen::ColMajor}; + typename MatrixType::Index rows = m.rows(), cols=m.cols(); + typedef Eigen::Matrix MatrixX; + typedef Eigen::Matrix MatrixXT; + // Dynamic reference: + typedef Eigen::Ref Ref; + typedef Eigen::Ref RefT; + + Ref r1(m); + Ref r2(m.block(rows/3, cols/4, rows/2, cols/2)); + RefT r3(m.transpose()); + RefT r4(m.topLeftCorner(rows/2, cols/2).transpose()); + + VERIFY_RAISES_ASSERT(RefT r5(m)); + VERIFY_RAISES_ASSERT(Ref r6(m.transpose())); + VERIFY_RAISES_ASSERT(Ref r7(Scalar(2) * m)); + + // Copy constructors shall also never malloc + Ref r8 = r1; + RefT r9 = r3; + + // Initializing from a compatible Ref shall also never malloc + Eigen::Ref > r10=r8, r11=m; + + // Initializing from an incompatible Ref will malloc: + typedef Eigen::Ref RefAligned; + VERIFY_RAISES_ASSERT(RefAligned r12=r10); + VERIFY_RAISES_ASSERT(Ref r13=r10); // r10 has more dynamic strides + +} + +void test_nomalloc() +{ + // create some dynamic objects + Eigen::MatrixXd M1 = MatrixXd::Random(3,3); + Ref R1 = 2.0*M1; // Ref requires temporary + + // from here on prohibit malloc: + Eigen::internal::set_is_malloc_allowed(false); + + // check that our operator new is indeed called: + VERIFY_RAISES_ASSERT(MatrixXd dummy(MatrixXd::Random(3,3))); + CALL_SUBTEST_1(nomalloc(Matrix()) ); + CALL_SUBTEST_2(nomalloc(Matrix4d()) ); + CALL_SUBTEST_3(nomalloc(Matrix()) ); + + // Check decomposition modules with dynamic matrices that have a known compile-time max size (ctms) + CALL_SUBTEST_4(ctms_decompositions()); + + CALL_SUBTEST_5(test_zerosized()); + + CALL_SUBTEST_6(test_reference(Matrix())); + CALL_SUBTEST_7(test_reference(R1)); + CALL_SUBTEST_8(Ref R2 = M1.topRows<2>(); test_reference(R2)); +} diff --git a/thirdparty/eigen/test/nullary.cpp b/thirdparty/eigen/test/nullary.cpp new file mode 100644 index 000000000..351d26e74 --- /dev/null +++ b/thirdparty/eigen/test/nullary.cpp @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2011 Jitse Niesen +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +bool equalsIdentity(const MatrixType& A) +{ + typedef typename MatrixType::Scalar Scalar; + Scalar zero = static_cast(0); + + bool offDiagOK = true; + for (Index i = 0; i < A.rows(); ++i) { + for (Index j = i+1; j < A.cols(); ++j) { + offDiagOK = offDiagOK && (A(i,j) == zero); + } + } + for (Index i = 0; i < A.rows(); ++i) { + for (Index j = 0; j < (std::min)(i, A.cols()); ++j) { + offDiagOK = offDiagOK && (A(i,j) == zero); + } + } + + bool diagOK = (A.diagonal().array() == 1).all(); + return offDiagOK && diagOK; + +} + +template +void check_extremity_accuracy(const VectorType &v, const typename VectorType::Scalar &low, const typename VectorType::Scalar &high) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; + + RealScalar prec = internal::is_same::value ? NumTraits::dummy_precision()*10 : NumTraits::dummy_precision()/10; + Index size = v.size(); + + if(size<20) + return; + + for (int i=0; isize-6) + { + Scalar ref = (low*RealScalar(size-i-1))/RealScalar(size-1) + (high*RealScalar(i))/RealScalar(size-1); + if(std::abs(ref)>1) + { + if(!internal::isApprox(v(i), ref, prec)) + std::cout << v(i) << " != " << ref << " ; relative error: " << std::abs((v(i)-ref)/ref) << " ; required precision: " << prec << " ; range: " << low << "," << high << " ; i: " << i << "\n"; + VERIFY(internal::isApprox(v(i), (low*RealScalar(size-i-1))/RealScalar(size-1) + (high*RealScalar(i))/RealScalar(size-1), prec)); + } + } + } +} + +template +void testVectorType(const VectorType& base) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; + + const Index size = base.size(); + + Scalar high = internal::random(-500,500); + Scalar low = (size == 1 ? high : internal::random(-500,500)); + if (low>high) std::swap(low,high); + + // check low==high + if(internal::random(0.f,1.f)<0.05f) + low = high; + // check abs(low) >> abs(high) + else if(size>2 && std::numeric_limits::max_exponent10>0 && internal::random(0.f,1.f)<0.1f) + low = -internal::random(1,2) * RealScalar(std::pow(RealScalar(10),std::numeric_limits::max_exponent10/2)); + + const Scalar step = ((size == 1) ? 1 : (high-low)/(size-1)); + + // check whether the result yields what we expect it to do + VectorType m(base); + m.setLinSpaced(size,low,high); + + if(!NumTraits::IsInteger) + { + VectorType n(size); + for (int i=0; i::IsInteger) || ((high-low)>=size && (Index(high-low)%(size-1))==0) || (Index(high-low+1)::IsInteger) || (high-low>=size)) + for (int i=0; i::IsInteger) + CALL_SUBTEST( check_extremity_accuracy(m, low, high) ); + } + + VERIFY( m(m.size()-1) <= high ); + VERIFY( (m.array() <= high).all() ); + VERIFY( (m.array() >= low).all() ); + + + VERIFY( m(m.size()-1) >= low ); + if(size>=1) + { + VERIFY( internal::isApprox(m(0),low) ); + VERIFY_IS_EQUAL(m(0) , low); + } + + // check whether everything works with row and col major vectors + Matrix row_vector(size); + Matrix col_vector(size); + row_vector.setLinSpaced(size,low,high); + col_vector.setLinSpaced(size,low,high); + // when using the extended precision (e.g., FPU) the relative error might exceed 1 bit + // when computing the squared sum in isApprox, thus the 2x factor. + VERIFY( row_vector.isApprox(col_vector.transpose(), Scalar(2)*NumTraits::epsilon())); + + Matrix size_changer(size+50); + size_changer.setLinSpaced(size,low,high); + VERIFY( size_changer.size() == size ); + + typedef Matrix ScalarMatrix; + ScalarMatrix scalar; + scalar.setLinSpaced(1,low,high); + VERIFY_IS_APPROX( scalar, ScalarMatrix::Constant(high) ); + VERIFY_IS_APPROX( ScalarMatrix::LinSpaced(1,low,high), ScalarMatrix::Constant(high) ); + + // regression test for bug 526 (linear vectorized transversal) + if (size > 1 && (!NumTraits::IsInteger)) { + m.tail(size-1).setLinSpaced(low, high); + VERIFY_IS_APPROX(m(size-1), high); + } +} + +template +void testMatrixType(const MatrixType& m) +{ + using std::abs; + const Index rows = m.rows(); + const Index cols = m.cols(); + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Scalar s1; + do { + s1 = internal::random(); + } while(abs(s1)::IsInteger)); + + MatrixType A; + A.setIdentity(rows, cols); + VERIFY(equalsIdentity(A)); + VERIFY(equalsIdentity(MatrixType::Identity(rows, cols))); + + + A = MatrixType::Constant(rows,cols,s1); + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + VERIFY_IS_APPROX( MatrixType::Constant(rows,cols,s1)(i,j), s1 ); + VERIFY_IS_APPROX( MatrixType::Constant(rows,cols,s1).coeff(i,j), s1 ); + VERIFY_IS_APPROX( A(i,j), s1 ); +} + +void test_nullary() +{ + CALL_SUBTEST_1( testMatrixType(Matrix2d()) ); + CALL_SUBTEST_2( testMatrixType(MatrixXcf(internal::random(1,300),internal::random(1,300))) ); + CALL_SUBTEST_3( testMatrixType(MatrixXf(internal::random(1,300),internal::random(1,300))) ); + + for(int i = 0; i < g_repeat*10; i++) { + CALL_SUBTEST_4( testVectorType(VectorXd(internal::random(1,30000))) ); + CALL_SUBTEST_5( testVectorType(Vector4d()) ); // regression test for bug 232 + CALL_SUBTEST_6( testVectorType(Vector3d()) ); + CALL_SUBTEST_7( testVectorType(VectorXf(internal::random(1,30000))) ); + CALL_SUBTEST_8( testVectorType(Vector3f()) ); + CALL_SUBTEST_8( testVectorType(Vector4f()) ); + CALL_SUBTEST_8( testVectorType(Matrix()) ); + CALL_SUBTEST_8( testVectorType(Matrix()) ); + + CALL_SUBTEST_9( testVectorType(VectorXi(internal::random(1,300))) ); + CALL_SUBTEST_9( testVectorType(Matrix()) ); + } + +#ifdef EIGEN_TEST_PART_6 + // Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79). + VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits::epsilon() ); +#endif + +#ifdef EIGEN_TEST_PART_9 + // Check possible overflow issue + { + int n = 60000; + ArrayXi a1(n), a2(n); + a1.setLinSpaced(n, 0, n-1); + for(int i=0; i >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( internal::has_binary_operator >::value )); + VERIFY(( !internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + // Regression unit test for a weird MSVC bug. + // Search "nullary_wrapper_workaround_msvc" in CoreEvaluators.h for the details. + // See also traits::match. + { + MatrixXf A = MatrixXf::Random(3,3); + Ref R = 2.0*A; + VERIFY_IS_APPROX(R, A+A); + + Ref R1 = MatrixXf::Random(3,3)+A; + + VectorXi V = VectorXi::Random(3); + Ref R2 = VectorXi::LinSpaced(3,1,3)+V; + VERIFY_IS_APPROX(R2, V+Vector3i(1,2,3)); + + VERIFY(( internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + } +#endif +} diff --git a/thirdparty/eigen/test/packetmath.cpp b/thirdparty/eigen/test/packetmath.cpp new file mode 100644 index 000000000..7821a1738 --- /dev/null +++ b/thirdparty/eigen/test/packetmath.cpp @@ -0,0 +1,641 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include "unsupported/Eigen/SpecialFunctions" + +#if defined __GNUC__ && __GNUC__>=6 + #pragma GCC diagnostic ignored "-Wignored-attributes" +#endif +// using namespace Eigen; + +#ifdef EIGEN_VECTORIZE_SSE +const bool g_vectorize_sse = true; +#else +const bool g_vectorize_sse = false; +#endif + +namespace Eigen { +namespace internal { +template T negate(const T& x) { return -x; } +} +} + +// NOTE: we disbale inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU. +template EIGEN_DONT_INLINE +bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) +{ + return internal::isMuchSmallerThan(a-b, refvalue); +} + +template bool areApproxAbs(const Scalar* a, const Scalar* b, int size, const typename NumTraits::Real& refvalue) +{ + for (int i=0; i >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; + return false; + } + } + return true; +} + +template bool areApprox(const Scalar* a, const Scalar* b, int size) +{ + for (int i=0; i >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; + return false; + } + } + return true; +} + +#define CHECK_CWISE1(REFOP, POP) { \ + for (int i=0; i(data1))); \ + VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ +} + +template +struct packet_helper +{ + template + inline Packet load(const T* from) const { return internal::pload(from); } + + template + inline void store(T* to, const Packet& x) const { internal::pstore(to,x); } +}; + +template +struct packet_helper +{ + template + inline T load(const T* from) const { return *from; } + + template + inline void store(T* to, const T& x) const { *to = x; } +}; + +#define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \ + packet_helper h; \ + for (int i=0; i h; \ + for (int i=0; i void packetmath() +{ + using std::abs; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + typedef typename NumTraits::Real RealScalar; + + const int max_size = PacketSize > 4 ? PacketSize : 4; + const int size = PacketSize*max_size; + EIGEN_ALIGN_MAX Scalar data1[size]; + EIGEN_ALIGN_MAX Scalar data2[size]; + EIGEN_ALIGN_MAX Packet packets[PacketSize*2]; + EIGEN_ALIGN_MAX Scalar ref[size]; + RealScalar refvalue = 0; + for (int i=0; i()/RealScalar(PacketSize); + data2[i] = internal::random()/RealScalar(PacketSize); + refvalue = (std::max)(refvalue,abs(data1[i])); + } + + internal::pstore(data2, internal::pload(data1)); + VERIFY(areApprox(data1, data2, PacketSize) && "aligned load/store"); + + for (int offset=0; offset(data1+offset)); + VERIFY(areApprox(data1+offset, data2, PacketSize) && "internal::ploadu"); + } + + for (int offset=0; offset(data1)); + VERIFY(areApprox(data1, data2+offset, PacketSize) && "internal::pstoreu"); + } + + for (int offset=0; offset(data1); + packets[1] = internal::pload(data1+PacketSize); + if (offset==0) internal::palign<0>(packets[0], packets[1]); + else if (offset==1) internal::palign<1>(packets[0], packets[1]); + else if (offset==2) internal::palign<2>(packets[0], packets[1]); + else if (offset==3) internal::palign<3>(packets[0], packets[1]); + else if (offset==4) internal::palign<4>(packets[0], packets[1]); + else if (offset==5) internal::palign<5>(packets[0], packets[1]); + else if (offset==6) internal::palign<6>(packets[0], packets[1]); + else if (offset==7) internal::palign<7>(packets[0], packets[1]); + else if (offset==8) internal::palign<8>(packets[0], packets[1]); + else if (offset==9) internal::palign<9>(packets[0], packets[1]); + else if (offset==10) internal::palign<10>(packets[0], packets[1]); + else if (offset==11) internal::palign<11>(packets[0], packets[1]); + else if (offset==12) internal::palign<12>(packets[0], packets[1]); + else if (offset==13) internal::palign<13>(packets[0], packets[1]); + else if (offset==14) internal::palign<14>(packets[0], packets[1]); + else if (offset==15) internal::palign<15>(packets[0], packets[1]); + internal::pstore(data2, packets[0]); + + for (int i=0; i::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv); + + CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd); + CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub); + CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); + CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); + + CHECK_CWISE1(internal::negate, internal::pnegate); + CHECK_CWISE1(numext::conj, internal::pconj); + + for(int offset=0;offset<3;++offset) + { + for (int i=0; i(data1[offset])); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::pset1"); + } + + { + for (int i=0; i(data1, A0, A1, A2, A3); + internal::pstore(data2+0*PacketSize, A0); + internal::pstore(data2+1*PacketSize, A1); + internal::pstore(data2+2*PacketSize, A2); + internal::pstore(data2+3*PacketSize, A3); + VERIFY(areApprox(ref, data2, 4*PacketSize) && "internal::pbroadcast4"); + } + + { + for (int i=0; i(data1, A0, A1); + internal::pstore(data2+0*PacketSize, A0); + internal::pstore(data2+1*PacketSize, A1); + VERIFY(areApprox(ref, data2, 2*PacketSize) && "internal::pbroadcast2"); + } + + VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload(data1))) && "internal::pfirst"); + + if(PacketSize>1) + { + for(int offset=0;offset<4;++offset) + { + for(int i=0;i(data1+offset)); + VERIFY(areApprox(ref, data2, PacketSize) && "ploaddup"); + } + } + + if(PacketSize>2) + { + for(int offset=0;offset<4;++offset) + { + for(int i=0;i(data1+offset)); + VERIFY(areApprox(ref, data2, PacketSize) && "ploadquad"); + } + } + + ref[0] = 0; + for (int i=0; i(data1)), refvalue) && "internal::predux"); + + { + for (int i=0; i<4; ++i) + ref[i] = 0; + for (int i=0; i(data1))); + VERIFY(areApprox(ref, data2, PacketSize>4?PacketSize/2:PacketSize) && "internal::predux_downto4"); + } + + ref[0] = 1; + for (int i=0; i(data1))) && "internal::predux_mul"); + + for (int j=0; j(data1+j*PacketSize); + } + internal::pstore(data2, internal::preduxp(packets)); + VERIFY(areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp"); + + for (int i=0; i(data1))); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse"); + + internal::PacketBlock kernel; + for (int i=0; i(data1+i*PacketSize); + } + ptranspose(kernel); + for (int i=0; i(data1); + Packet elsePacket = internal::pload(data2); + EIGEN_ALIGN_MAX internal::Selector selector; + for (int i = 0; i < PacketSize; ++i) { + selector.select[i] = i; + } + + Packet blend = internal::pblend(selector, thenPacket, elsePacket); + EIGEN_ALIGN_MAX Scalar result[size]; + internal::pstore(result, blend); + for (int i = 0; i < PacketSize; ++i) { + VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue)); + } + } + + if (PacketTraits::HasBlend || g_vectorize_sse) { + // pinsertfirst + for (int i=0; i(); + ref[0] = s; + internal::pstore(data2, internal::pinsertfirst(internal::pload(data1),s)); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertfirst"); + } + + if (PacketTraits::HasBlend || g_vectorize_sse) { + // pinsertlast + for (int i=0; i(); + ref[PacketSize-1] = s; + internal::pstore(data2, internal::pinsertlast(internal::pload(data1),s)); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertlast"); + } +} + +template void packetmath_real() +{ + using std::abs; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + + const int size = PacketSize*4; + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; + + for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); + data2[i] = internal::random(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); + } + CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin); + CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); + CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); + + CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); + CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); + CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); + + for (int i=0; i(-1,1); + data2[i] = internal::random(-1,1); + } + CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin); + CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos); + + for (int i=0; i(-87,88); + data2[i] = internal::random(-87,88); + } + CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp); + for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-6,6)); + data2[i] = internal::random(-1,1) * std::pow(Scalar(10), internal::random(-6,6)); + } + CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh); + if(PacketTraits::HasExp && PacketTraits::size>=2) + { + data1[0] = std::numeric_limits::quiet_NaN(); + data1[1] = std::numeric_limits::epsilon(); + packet_helper h; + h.store(data2, internal::pexp(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + VERIFY_IS_EQUAL(std::exp(std::numeric_limits::epsilon()), data2[1]); + + data1[0] = -std::numeric_limits::epsilon(); + data1[1] = 0; + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::epsilon()), data2[0]); + VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]); + + data1[0] = (std::numeric_limits::min)(); + data1[1] = -(std::numeric_limits::min)(); + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp((std::numeric_limits::min)()), data2[0]); + VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits::min)()), data2[1]); + + data1[0] = std::numeric_limits::denorm_min(); + data1[1] = -std::numeric_limits::denorm_min(); + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp(std::numeric_limits::denorm_min()), data2[0]); + VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); + } + + if (PacketTraits::HasTanh) { + // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details. + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasTanh,Packet> h; + h.store(data2, internal::ptanh(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + +#if EIGEN_HAS_C99_MATH + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasLGamma,Packet> h; + h.store(data2, internal::plgamma(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErf,Packet> h; + h.store(data2, internal::perf(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErfc,Packet> h; + h.store(data2, internal::perfc(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } +#endif // EIGEN_HAS_C99_MATH + + for (int i=0; i(0,1) * std::pow(Scalar(10), internal::random(-6,6)); + data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); + } + + if(internal::random(0,1)<0.1f) + data1[internal::random(0, PacketSize)] = 0; + CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); + CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); +#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L) + CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p); + CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); + CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); + CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); +#endif + + if(PacketTraits::HasLog && PacketTraits::size>=2) + { + data1[0] = std::numeric_limits::quiet_NaN(); + data1[1] = std::numeric_limits::epsilon(); + packet_helper h; + h.store(data2, internal::plog(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + VERIFY_IS_EQUAL(std::log(std::numeric_limits::epsilon()), data2[1]); + + data1[0] = -std::numeric_limits::epsilon(); + data1[1] = 0; + h.store(data2, internal::plog(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]); + + data1[0] = (std::numeric_limits::min)(); + data1[1] = -(std::numeric_limits::min)(); + h.store(data2, internal::plog(h.load(data1))); + VERIFY_IS_EQUAL(std::log((std::numeric_limits::min)()), data2[0]); + VERIFY((numext::isnan)(data2[1])); + + data1[0] = std::numeric_limits::denorm_min(); + data1[1] = -std::numeric_limits::denorm_min(); + h.store(data2, internal::plog(h.load(data1))); + // VERIFY_IS_EQUAL(std::log(std::numeric_limits::denorm_min()), data2[0]); + VERIFY((numext::isnan)(data2[1])); + + data1[0] = Scalar(-1.0f); + h.store(data2, internal::plog(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + h.store(data2, internal::psqrt(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + VERIFY((numext::isnan)(data2[1])); + } +} + +template void packetmath_notcomplex() +{ + using std::abs; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; + + Array::Map(data1, PacketTraits::size*4).setRandom(); + + ref[0] = data1[0]; + for (int i=0; i(data1))) && "internal::predux_min"); + + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax); + + CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin); + CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax); + CHECK_CWISE1(abs, internal::pabs); + + ref[0] = data1[0]; + for (int i=0; i(data1))) && "internal::predux_max"); + + for (int i=0; i(data1[0])); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::plset"); +} + +template void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) +{ + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + + internal::conj_if cj0; + internal::conj_if cj1; + internal::conj_helper cj; + internal::conj_helper pcj; + + for(int i=0;i(data1),internal::pload(data2))); + VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmul"); + + for(int i=0;i(data1),internal::pload(data2),internal::pload(pval))); + VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmadd"); +} + +template void packetmath_complex() +{ + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + + const int size = PacketSize*4; + EIGEN_ALIGN_MAX Scalar data1[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar pval[PacketSize*4]; + + for (int i=0; i() * Scalar(1e2); + data2[i] = internal::random() * Scalar(1e2); + } + + test_conj_helper (data1,data2,ref,pval); + test_conj_helper (data1,data2,ref,pval); + test_conj_helper (data1,data2,ref,pval); + test_conj_helper (data1,data2,ref,pval); + + { + for(int i=0;i(data1))); + VERIFY(areApprox(ref, pval, PacketSize) && "pcplxflip"); + } +} + +template void packetmath_scatter_gather() +{ + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + typedef typename NumTraits::Real RealScalar; + const int PacketSize = PacketTraits::size; + EIGEN_ALIGN_MAX Scalar data1[PacketSize]; + RealScalar refvalue = 0; + for (int i=0; i()/RealScalar(PacketSize); + } + + int stride = internal::random(1,20); + + EIGEN_ALIGN_MAX Scalar buffer[PacketSize*20]; + memset(buffer, 0, 20*PacketSize*sizeof(Scalar)); + Packet packet = internal::pload(data1); + internal::pscatter(buffer, packet, stride); + + for (int i = 0; i < PacketSize*20; ++i) { + if ((i%stride) == 0 && i()/RealScalar(PacketSize); + } + packet = internal::pgather(buffer, 7); + internal::pstore(data1, packet); + for (int i = 0; i < PacketSize; ++i) { + VERIFY(isApproxAbs(data1[i], buffer[i*7], refvalue) && "pgather"); + } +} + +void test_packetmath() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( packetmath() ); + CALL_SUBTEST_2( packetmath() ); + CALL_SUBTEST_3( packetmath() ); + CALL_SUBTEST_4( packetmath >() ); + CALL_SUBTEST_5( packetmath >() ); + + CALL_SUBTEST_1( packetmath_notcomplex() ); + CALL_SUBTEST_2( packetmath_notcomplex() ); + CALL_SUBTEST_3( packetmath_notcomplex() ); + + CALL_SUBTEST_1( packetmath_real() ); + CALL_SUBTEST_2( packetmath_real() ); + + CALL_SUBTEST_4( packetmath_complex >() ); + CALL_SUBTEST_5( packetmath_complex >() ); + + CALL_SUBTEST_1( packetmath_scatter_gather() ); + CALL_SUBTEST_2( packetmath_scatter_gather() ); + CALL_SUBTEST_3( packetmath_scatter_gather() ); + CALL_SUBTEST_4( packetmath_scatter_gather >() ); + CALL_SUBTEST_5( packetmath_scatter_gather >() ); + } +} diff --git a/thirdparty/eigen/test/pardiso_support.cpp b/thirdparty/eigen/test/pardiso_support.cpp new file mode 100644 index 000000000..67efad6d8 --- /dev/null +++ b/thirdparty/eigen/test/pardiso_support.cpp @@ -0,0 +1,29 @@ +/* + Intel Copyright (C) .... +*/ + +#include "sparse_solver.h" +#include + +template void test_pardiso_T() +{ + PardisoLLT < SparseMatrix, Lower> pardiso_llt_lower; + PardisoLLT < SparseMatrix, Upper> pardiso_llt_upper; + PardisoLDLT < SparseMatrix, Lower> pardiso_ldlt_lower; + PardisoLDLT < SparseMatrix, Upper> pardiso_ldlt_upper; + PardisoLU < SparseMatrix > pardiso_lu; + + check_sparse_spd_solving(pardiso_llt_lower); + check_sparse_spd_solving(pardiso_llt_upper); + check_sparse_spd_solving(pardiso_ldlt_lower); + check_sparse_spd_solving(pardiso_ldlt_upper); + check_sparse_square_solving(pardiso_lu); +} + +void test_pardiso_support() +{ + CALL_SUBTEST_1(test_pardiso_T()); + CALL_SUBTEST_2(test_pardiso_T()); + CALL_SUBTEST_3(test_pardiso_T< std::complex >()); + CALL_SUBTEST_4(test_pardiso_T< std::complex >()); +} diff --git a/thirdparty/eigen/test/pastix_support.cpp b/thirdparty/eigen/test/pastix_support.cpp new file mode 100644 index 000000000..b62f85739 --- /dev/null +++ b/thirdparty/eigen/test/pastix_support.cpp @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2012 Gael Guennebaud +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#include "sparse_solver.h" +#include +#include + + +template void test_pastix_T() +{ + PastixLLT< SparseMatrix, Eigen::Lower > pastix_llt_lower; + PastixLDLT< SparseMatrix, Eigen::Lower > pastix_ldlt_lower; + PastixLLT< SparseMatrix, Eigen::Upper > pastix_llt_upper; + PastixLDLT< SparseMatrix, Eigen::Upper > pastix_ldlt_upper; + PastixLU< SparseMatrix > pastix_lu; + + check_sparse_spd_solving(pastix_llt_lower); + check_sparse_spd_solving(pastix_ldlt_lower); + check_sparse_spd_solving(pastix_llt_upper); + check_sparse_spd_solving(pastix_ldlt_upper); + check_sparse_square_solving(pastix_lu); + + // Some compilation check: + pastix_llt_lower.iparm(); + pastix_llt_lower.dparm(); + pastix_ldlt_lower.iparm(); + pastix_ldlt_lower.dparm(); + pastix_lu.iparm(); + pastix_lu.dparm(); +} + +// There is no support for selfadjoint matrices with PaStiX. +// Complex symmetric matrices should pass though +template void test_pastix_T_LU() +{ + PastixLU< SparseMatrix > pastix_lu; + check_sparse_square_solving(pastix_lu); +} + +void test_pastix_support() +{ + CALL_SUBTEST_1(test_pastix_T()); + CALL_SUBTEST_2(test_pastix_T()); + CALL_SUBTEST_3( (test_pastix_T_LU >()) ); + CALL_SUBTEST_4(test_pastix_T_LU >()); +} diff --git a/thirdparty/eigen/test/permutationmatrices.cpp b/thirdparty/eigen/test/permutationmatrices.cpp new file mode 100644 index 000000000..41aa57d6d --- /dev/null +++ b/thirdparty/eigen/test/permutationmatrices.cpp @@ -0,0 +1,150 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define TEST_ENABLE_TEMPORARY_TRACKING + +#include "main.h" + +using namespace std; +template void permutationmatrices(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime, + Options = MatrixType::Options }; + typedef PermutationMatrix LeftPermutationType; + typedef Matrix LeftPermutationVectorType; + typedef Map MapLeftPerm; + typedef PermutationMatrix RightPermutationType; + typedef Matrix RightPermutationVectorType; + typedef Map MapRightPerm; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m_original = MatrixType::Random(rows,cols); + LeftPermutationVectorType lv; + randomPermutationVector(lv, rows); + LeftPermutationType lp(lv); + RightPermutationVectorType rv; + randomPermutationVector(rv, cols); + RightPermutationType rp(rv); + MatrixType m_permuted = MatrixType::Random(rows,cols); + + const int one_if_dynamic = MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0; + VERIFY_EVALUATION_COUNT(m_permuted = lp * m_original * rp, one_if_dynamic); // 1 temp for sub expression "lp * m_original" + + for (int i=0; i lm(lp); + Matrix rm(rp); + + VERIFY_IS_APPROX(m_permuted, lm*m_original*rm); + + m_permuted = m_original; + VERIFY_EVALUATION_COUNT(m_permuted = lp * m_permuted * rp, one_if_dynamic); + VERIFY_IS_APPROX(m_permuted, lm*m_original*rm); + + VERIFY_IS_APPROX(lp.inverse()*m_permuted*rp.inverse(), m_original); + VERIFY_IS_APPROX(lv.asPermutation().inverse()*m_permuted*rv.asPermutation().inverse(), m_original); + VERIFY_IS_APPROX(MapLeftPerm(lv.data(),lv.size()).inverse()*m_permuted*MapRightPerm(rv.data(),rv.size()).inverse(), m_original); + + VERIFY((lp*lp.inverse()).toDenseMatrix().isIdentity()); + VERIFY((lv.asPermutation()*lv.asPermutation().inverse()).toDenseMatrix().isIdentity()); + VERIFY((MapLeftPerm(lv.data(),lv.size())*MapLeftPerm(lv.data(),lv.size()).inverse()).toDenseMatrix().isIdentity()); + + LeftPermutationVectorType lv2; + randomPermutationVector(lv2, rows); + LeftPermutationType lp2(lv2); + Matrix lm2(lp2); + VERIFY_IS_APPROX((lp*lp2).toDenseMatrix().template cast(), lm*lm2); + VERIFY_IS_APPROX((lv.asPermutation()*lv2.asPermutation()).toDenseMatrix().template cast(), lm*lm2); + VERIFY_IS_APPROX((MapLeftPerm(lv.data(),lv.size())*MapLeftPerm(lv2.data(),lv2.size())).toDenseMatrix().template cast(), lm*lm2); + + LeftPermutationType identityp; + identityp.setIdentity(rows); + VERIFY_IS_APPROX(m_original, identityp*m_original); + + // check inplace permutations + m_permuted = m_original; + VERIFY_EVALUATION_COUNT(m_permuted.noalias()= lp.inverse() * m_permuted, one_if_dynamic); // 1 temp to allocate the mask + VERIFY_IS_APPROX(m_permuted, lp.inverse()*m_original); + + m_permuted = m_original; + VERIFY_EVALUATION_COUNT(m_permuted.noalias() = m_permuted * rp.inverse(), one_if_dynamic); // 1 temp to allocate the mask + VERIFY_IS_APPROX(m_permuted, m_original*rp.inverse()); + + m_permuted = m_original; + VERIFY_EVALUATION_COUNT(m_permuted.noalias() = lp * m_permuted, one_if_dynamic); // 1 temp to allocate the mask + VERIFY_IS_APPROX(m_permuted, lp*m_original); + + m_permuted = m_original; + VERIFY_EVALUATION_COUNT(m_permuted.noalias() = m_permuted * rp, one_if_dynamic); // 1 temp to allocate the mask + VERIFY_IS_APPROX(m_permuted, m_original*rp); + + if(rows>1 && cols>1) + { + lp2 = lp; + Index i = internal::random(0, rows-1); + Index j; + do j = internal::random(0, rows-1); while(j==i); + lp2.applyTranspositionOnTheLeft(i, j); + lm = lp; + lm.row(i).swap(lm.row(j)); + VERIFY_IS_APPROX(lm, lp2.toDenseMatrix().template cast()); + + RightPermutationType rp2 = rp; + i = internal::random(0, cols-1); + do j = internal::random(0, cols-1); while(j==i); + rp2.applyTranspositionOnTheRight(i, j); + rm = rp; + rm.col(i).swap(rm.col(j)); + VERIFY_IS_APPROX(rm, rp2.toDenseMatrix().template cast()); + } +} + +template +void bug890() +{ + typedef Matrix MatrixType; + typedef Matrix VectorType; + typedef Stride S; + typedef Map MapType; + typedef PermutationMatrix Perm; + + VectorType v1(2), v2(2), op(4), rhs(2); + v1 << 666,667; + op << 1,0,0,1; + rhs << 42,42; + + Perm P(2); + P.indices() << 1, 0; + + MapType(v1.data(),2,1,S(1,1)) = P * MapType(rhs.data(),2,1,S(1,1)); + VERIFY_IS_APPROX(v1, (P * rhs).eval()); + + MapType(v1.data(),2,1,S(1,1)) = P.inverse() * MapType(rhs.data(),2,1,S(1,1)); + VERIFY_IS_APPROX(v1, (P.inverse() * rhs).eval()); +} + +void test_permutationmatrices() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( permutationmatrices(Matrix()) ); + CALL_SUBTEST_2( permutationmatrices(Matrix3f()) ); + CALL_SUBTEST_3( permutationmatrices(Matrix()) ); + CALL_SUBTEST_4( permutationmatrices(Matrix4d()) ); + CALL_SUBTEST_5( permutationmatrices(Matrix()) ); + CALL_SUBTEST_6( permutationmatrices(Matrix(20, 30)) ); + CALL_SUBTEST_7( permutationmatrices(MatrixXcf(15, 10)) ); + } + CALL_SUBTEST_5( bug890() ); +} diff --git a/thirdparty/eigen/test/prec_inverse_4x4.cpp b/thirdparty/eigen/test/prec_inverse_4x4.cpp new file mode 100644 index 000000000..eb6ad18c9 --- /dev/null +++ b/thirdparty/eigen/test/prec_inverse_4x4.cpp @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template void inverse_permutation_4x4() +{ + typedef typename MatrixType::Scalar Scalar; + Vector4i indices(0,1,2,3); + for(int i = 0; i < 24; ++i) + { + MatrixType m = PermutationMatrix<4>(indices); + MatrixType inv = m.inverse(); + double error = double( (m*inv-MatrixType::Identity()).norm() / NumTraits::epsilon() ); + EIGEN_DEBUG_VAR(error) + VERIFY(error == 0.0); + std::next_permutation(indices.data(),indices.data()+4); + } +} + +template void inverse_general_4x4(int repeat) +{ + using std::abs; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + double error_sum = 0., error_max = 0.; + for(int i = 0; i < repeat; ++i) + { + MatrixType m; + RealScalar absdet; + do { + m = MatrixType::Random(); + absdet = abs(m.determinant()); + } while(absdet < NumTraits::epsilon()); + MatrixType inv = m.inverse(); + double error = double( (m*inv-MatrixType::Identity()).norm() * absdet / NumTraits::epsilon() ); + error_sum += error; + error_max = (std::max)(error_max, error); + } + std::cerr << "inverse_general_4x4, Scalar = " << type_name() << std::endl; + double error_avg = error_sum / repeat; + EIGEN_DEBUG_VAR(error_avg); + EIGEN_DEBUG_VAR(error_max); + // FIXME that 1.25 used to be a 1.0 until the NumTraits changes on 28 April 2010, what's going wrong?? + // FIXME that 1.25 used to be 1.2 until we tested gcc 4.1 on 30 June 2010 and got 1.21. + VERIFY(error_avg < (NumTraits::IsComplex ? 8.0 : 1.25)); + VERIFY(error_max < (NumTraits::IsComplex ? 64.0 : 20.0)); + + { + int s = 5;//internal::random(4,10); + int i = 0;//internal::random(0,s-4); + int j = 0;//internal::random(0,s-4); + Matrix mat(s,s); + mat.setRandom(); + MatrixType submat = mat.template block<4,4>(i,j); + MatrixType mat_inv = mat.template block<4,4>(i,j).inverse(); + VERIFY_IS_APPROX(mat_inv, submat.inverse()); + mat.template block<4,4>(i,j) = submat.inverse(); + VERIFY_IS_APPROX(mat_inv, (mat.template block<4,4>(i,j))); + } +} + +void test_prec_inverse_4x4() +{ + CALL_SUBTEST_1((inverse_permutation_4x4())); + CALL_SUBTEST_1(( inverse_general_4x4(200000 * g_repeat) )); + CALL_SUBTEST_1(( inverse_general_4x4 >(200000 * g_repeat) )); + + CALL_SUBTEST_2((inverse_permutation_4x4 >())); + CALL_SUBTEST_2(( inverse_general_4x4 >(200000 * g_repeat) )); + CALL_SUBTEST_2(( inverse_general_4x4 >(200000 * g_repeat) )); + + CALL_SUBTEST_3((inverse_permutation_4x4())); + CALL_SUBTEST_3((inverse_general_4x4(50000 * g_repeat))); +} diff --git a/thirdparty/eigen/test/product.h b/thirdparty/eigen/test/product.h new file mode 100644 index 000000000..3b6511270 --- /dev/null +++ b/thirdparty/eigen/test/product.h @@ -0,0 +1,231 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template +bool areNotApprox(const MatrixBase& m1, const MatrixBase& m2, typename Derived1::RealScalar epsilon = NumTraits::dummy_precision()) +{ + return !((m1-m2).cwiseAbs2().maxCoeff() < epsilon * epsilon + * (std::max)(m1.cwiseAbs2().maxCoeff(), m2.cwiseAbs2().maxCoeff())); +} + +template void product(const MatrixType& m) +{ + /* this test covers the following files: + Identity.h Product.h + */ + typedef typename MatrixType::Scalar Scalar; + typedef Matrix RowVectorType; + typedef Matrix ColVectorType; + typedef Matrix RowSquareMatrixType; + typedef Matrix ColSquareMatrixType; + typedef Matrix OtherMajorMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + // this test relies a lot on Random.h, and there's not much more that we can do + // to test it, hence I consider that we will have tested Random.h + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols); + RowSquareMatrixType + identity = RowSquareMatrixType::Identity(rows, rows), + square = RowSquareMatrixType::Random(rows, rows), + res = RowSquareMatrixType::Random(rows, rows); + ColSquareMatrixType + square2 = ColSquareMatrixType::Random(cols, cols), + res2 = ColSquareMatrixType::Random(cols, cols); + RowVectorType v1 = RowVectorType::Random(rows); + ColVectorType vc2 = ColVectorType::Random(cols), vcres(cols); + OtherMajorMatrixType tm1 = m1; + + Scalar s1 = internal::random(); + + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1), + c2 = internal::random(0, cols-1); + + // begin testing Product.h: only associativity for now + // (we use Transpose.h but this doesn't count as a test for it) + VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2)); + m3 = m1; + m3 *= m1.transpose() * m2; + VERIFY_IS_APPROX(m3, m1 * (m1.transpose()*m2)); + VERIFY_IS_APPROX(m3, m1 * (m1.transpose()*m2)); + + // continue testing Product.h: distributivity + VERIFY_IS_APPROX(square*(m1 + m2), square*m1+square*m2); + VERIFY_IS_APPROX(square*(m1 - m2), square*m1-square*m2); + + // continue testing Product.h: compatibility with ScalarMultiple.h + VERIFY_IS_APPROX(s1*(square*m1), (s1*square)*m1); + VERIFY_IS_APPROX(s1*(square*m1), square*(m1*s1)); + + // test Product.h together with Identity.h + VERIFY_IS_APPROX(v1, identity*v1); + VERIFY_IS_APPROX(v1.transpose(), v1.transpose() * identity); + // again, test operator() to check const-qualification + VERIFY_IS_APPROX(MatrixType::Identity(rows, cols)(r,c), static_cast(r==c)); + + if (rows!=cols) + VERIFY_RAISES_ASSERT(m3 = m1*m1); + + // test the previous tests were not screwed up because operator* returns 0 + // (we use the more accurate default epsilon) + if (!NumTraits::IsInteger && (std::min)(rows,cols)>1) + { + VERIFY(areNotApprox(m1.transpose()*m2,m2.transpose()*m1)); + } + + // test optimized operator+= path + res = square; + res.noalias() += m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + if (!NumTraits::IsInteger && (std::min)(rows,cols)>1) + { + VERIFY(areNotApprox(res,square + m2 * m1.transpose())); + } + vcres = vc2; + vcres.noalias() += m1.transpose() * v1; + VERIFY_IS_APPROX(vcres, vc2 + m1.transpose() * v1); + + // test optimized operator-= path + res = square; + res.noalias() -= m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - (m1 * m2.transpose())); + if (!NumTraits::IsInteger && (std::min)(rows,cols)>1) + { + VERIFY(areNotApprox(res,square - m2 * m1.transpose())); + } + vcres = vc2; + vcres.noalias() -= m1.transpose() * v1; + VERIFY_IS_APPROX(vcres, vc2 - m1.transpose() * v1); + + // test d ?= a+b*c rules + res.noalias() = square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + res.noalias() += square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose())); + res.noalias() -= square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + + // test d ?= a-b*c rules + res.noalias() = square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - m1 * m2.transpose()); + res.noalias() += square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square - m1 * m2.transpose())); + res.noalias() -= square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - m1 * m2.transpose()); + + + tm1 = m1; + VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1); + VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1); + + // test submatrix and matrix/vector product + for (int i=0; i::IsInteger && (std::min)(rows,cols)>1) + { + VERIFY(areNotApprox(res2,square2 + m2.transpose() * m1)); + } + + VERIFY_IS_APPROX(res.col(r).noalias() = square.adjoint() * square.col(r), (square.adjoint() * square.col(r)).eval()); + VERIFY_IS_APPROX(res.col(r).noalias() = square * square.col(r), (square * square.col(r)).eval()); + + // vector at runtime (see bug 1166) + { + RowSquareMatrixType ref(square); + ColSquareMatrixType ref2(square2); + ref = res = square; + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.col(0).transpose() * square.transpose(), (ref.row(0) = m1.col(0).transpose() * square.transpose())); + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.block(0,0,rows,1).transpose() * square.transpose(), (ref.row(0) = m1.col(0).transpose() * square.transpose())); + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.col(0).transpose() * square, (ref.row(0) = m1.col(0).transpose() * square)); + VERIFY_IS_APPROX(res.block(0,0,1,rows).noalias() = m1.block(0,0,rows,1).transpose() * square, (ref.row(0) = m1.col(0).transpose() * square)); + ref2 = res2 = square2; + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.row(0) * square2.transpose(), (ref2.row(0) = m1.row(0) * square2.transpose())); + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2.transpose(), (ref2.row(0) = m1.row(0) * square2.transpose())); + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.row(0) * square2, (ref2.row(0) = m1.row(0) * square2)); + VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2, (ref2.row(0) = m1.row(0) * square2)); + } + + // vector.block() (see bug 1283) + { + RowVectorType w1(rows); + VERIFY_IS_APPROX(square * v1.block(0,0,rows,1), square * v1); + VERIFY_IS_APPROX(w1.noalias() = square * v1.block(0,0,rows,1), square * v1); + VERIFY_IS_APPROX(w1.block(0,0,rows,1).noalias() = square * v1.block(0,0,rows,1), square * v1); + + Matrix w2(cols); + VERIFY_IS_APPROX(vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + + vc2 = square2.block(0,0,1,cols).transpose(); + VERIFY_IS_APPROX(square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + + vc2 = square2.block(0,0,cols,1); + VERIFY_IS_APPROX(square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + } + + // inner product + { + Scalar x = square2.row(c) * square2.col(c2); + VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum()); + } + + // outer product + { + VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose()); + VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols)); + } + + // Aliasing + { + ColVectorType x(cols); x.setRandom(); + ColVectorType z(x); + ColVectorType y(cols); y.setZero(); + ColSquareMatrixType A(cols,cols); A.setRandom(); + // CwiseBinaryOp + VERIFY_IS_APPROX(x = y + A*x, A*z); + x = z; + // CwiseUnaryOp + VERIFY_IS_APPROX(x = Scalar(1.)*(A*x), A*z); + } + + // regression for blas_trais + { + VERIFY_IS_APPROX(square * (square*square).transpose(), square * square.transpose() * square.transpose()); + VERIFY_IS_APPROX(square * (-(square*square)), -square * square * square); + VERIFY_IS_APPROX(square * (s1*(square*square)), s1 * square * square * square); + VERIFY_IS_APPROX(square * (square*square).conjugate(), square * square.conjugate() * square.conjugate()); + } + +} diff --git a/thirdparty/eigen/test/product_extra.cpp b/thirdparty/eigen/test/product_extra.cpp new file mode 100644 index 000000000..e2b855bff --- /dev/null +++ b/thirdparty/eigen/test/product_extra.cpp @@ -0,0 +1,375 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void product_extra(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix RowVectorType; + typedef Matrix ColVectorType; + typedef Matrix OtherMajorMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + mzero = MatrixType::Zero(rows, cols), + identity = MatrixType::Identity(rows, rows), + square = MatrixType::Random(rows, rows), + res = MatrixType::Random(rows, rows), + square2 = MatrixType::Random(cols, cols), + res2 = MatrixType::Random(cols, cols); + RowVectorType v1 = RowVectorType::Random(rows), vrres(rows); + ColVectorType vc2 = ColVectorType::Random(cols), vcres(cols); + OtherMajorMatrixType tm1 = m1; + + Scalar s1 = internal::random(), + s2 = internal::random(), + s3 = internal::random(); + + VERIFY_IS_APPROX(m3.noalias() = m1 * m2.adjoint(), m1 * m2.adjoint().eval()); + VERIFY_IS_APPROX(m3.noalias() = m1.adjoint() * square.adjoint(), m1.adjoint().eval() * square.adjoint().eval()); + VERIFY_IS_APPROX(m3.noalias() = m1.adjoint() * m2, m1.adjoint().eval() * m2); + VERIFY_IS_APPROX(m3.noalias() = (s1 * m1.adjoint()) * m2, (s1 * m1.adjoint()).eval() * m2); + VERIFY_IS_APPROX(m3.noalias() = ((s1 * m1).adjoint()) * m2, (numext::conj(s1) * m1.adjoint()).eval() * m2); + VERIFY_IS_APPROX(m3.noalias() = (- m1.adjoint() * s1) * (s3 * m2), (- m1.adjoint() * s1).eval() * (s3 * m2).eval()); + VERIFY_IS_APPROX(m3.noalias() = (s2 * m1.adjoint() * s1) * m2, (s2 * m1.adjoint() * s1).eval() * m2); + VERIFY_IS_APPROX(m3.noalias() = (-m1*s2) * s1*m2.adjoint(), (-m1*s2).eval() * (s1*m2.adjoint()).eval()); + + // a very tricky case where a scale factor has to be automatically conjugated: + VERIFY_IS_APPROX( m1.adjoint() * (s1*m2).conjugate(), (m1.adjoint()).eval() * ((s1*m2).conjugate()).eval()); + + + // test all possible conjugate combinations for the four matrix-vector product cases: + + VERIFY_IS_APPROX((-m1.conjugate() * s2) * (s1 * vc2), + (-m1.conjugate()*s2).eval() * (s1 * vc2).eval()); + VERIFY_IS_APPROX((-m1 * s2) * (s1 * vc2.conjugate()), + (-m1*s2).eval() * (s1 * vc2.conjugate()).eval()); + VERIFY_IS_APPROX((-m1.conjugate() * s2) * (s1 * vc2.conjugate()), + (-m1.conjugate()*s2).eval() * (s1 * vc2.conjugate()).eval()); + + VERIFY_IS_APPROX((s1 * vc2.transpose()) * (-m1.adjoint() * s2), + (s1 * vc2.transpose()).eval() * (-m1.adjoint()*s2).eval()); + VERIFY_IS_APPROX((s1 * vc2.adjoint()) * (-m1.transpose() * s2), + (s1 * vc2.adjoint()).eval() * (-m1.transpose()*s2).eval()); + VERIFY_IS_APPROX((s1 * vc2.adjoint()) * (-m1.adjoint() * s2), + (s1 * vc2.adjoint()).eval() * (-m1.adjoint()*s2).eval()); + + VERIFY_IS_APPROX((-m1.adjoint() * s2) * (s1 * v1.transpose()), + (-m1.adjoint()*s2).eval() * (s1 * v1.transpose()).eval()); + VERIFY_IS_APPROX((-m1.transpose() * s2) * (s1 * v1.adjoint()), + (-m1.transpose()*s2).eval() * (s1 * v1.adjoint()).eval()); + VERIFY_IS_APPROX((-m1.adjoint() * s2) * (s1 * v1.adjoint()), + (-m1.adjoint()*s2).eval() * (s1 * v1.adjoint()).eval()); + + VERIFY_IS_APPROX((s1 * v1) * (-m1.conjugate() * s2), + (s1 * v1).eval() * (-m1.conjugate()*s2).eval()); + VERIFY_IS_APPROX((s1 * v1.conjugate()) * (-m1 * s2), + (s1 * v1.conjugate()).eval() * (-m1*s2).eval()); + VERIFY_IS_APPROX((s1 * v1.conjugate()) * (-m1.conjugate() * s2), + (s1 * v1.conjugate()).eval() * (-m1.conjugate()*s2).eval()); + + VERIFY_IS_APPROX((-m1.adjoint() * s2) * (s1 * v1.adjoint()), + (-m1.adjoint()*s2).eval() * (s1 * v1.adjoint()).eval()); + + // test the vector-matrix product with non aligned starts + Index i = internal::random(0,m1.rows()-2); + Index j = internal::random(0,m1.cols()-2); + Index r = internal::random(1,m1.rows()-i); + Index c = internal::random(1,m1.cols()-j); + Index i2 = internal::random(0,m1.rows()-1); + Index j2 = internal::random(0,m1.cols()-1); + + VERIFY_IS_APPROX(m1.col(j2).adjoint() * m1.block(0,j,m1.rows(),c), m1.col(j2).adjoint().eval() * m1.block(0,j,m1.rows(),c).eval()); + VERIFY_IS_APPROX(m1.block(i,0,r,m1.cols()) * m1.row(i2).adjoint(), m1.block(i,0,r,m1.cols()).eval() * m1.row(i2).adjoint().eval()); + + // regression test + MatrixType tmp = m1 * m1.adjoint() * s1; + VERIFY_IS_APPROX(tmp, m1 * m1.adjoint() * s1); + + // regression test for bug 1343, assignment to arrays + Array a1 = m1 * vc2; + VERIFY_IS_APPROX(a1.matrix(),m1*vc2); + Array a2 = s1 * (m1 * vc2); + VERIFY_IS_APPROX(a2.matrix(),s1*m1*vc2); + Array a3 = v1 * m1; + VERIFY_IS_APPROX(a3.matrix(),v1*m1); + Array a4 = m1 * m2.adjoint(); + VERIFY_IS_APPROX(a4.matrix(),m1*m2.adjoint()); +} + +// Regression test for bug reported at http://forum.kde.org/viewtopic.php?f=74&t=96947 +void mat_mat_scalar_scalar_product() +{ + Eigen::Matrix2Xd dNdxy(2, 3); + dNdxy << -0.5, 0.5, 0, + -0.3, 0, 0.3; + double det = 6.0, wt = 0.5; + VERIFY_IS_APPROX(dNdxy.transpose()*dNdxy*det*wt, det*wt*dNdxy.transpose()*dNdxy); +} + +template +void zero_sized_objects(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + const int PacketSize = internal::packet_traits::size; + const int PacketSize1 = PacketSize>1 ? PacketSize-1 : 1; + Index rows = m.rows(); + Index cols = m.cols(); + + { + MatrixType res, a(rows,0), b(0,cols); + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(rows,cols) ); + VERIFY_IS_APPROX( (res=a*a.transpose()), MatrixType::Zero(rows,rows) ); + VERIFY_IS_APPROX( (res=b.transpose()*b), MatrixType::Zero(cols,cols) ); + VERIFY_IS_APPROX( (res=b.transpose()*a.transpose()), MatrixType::Zero(cols,rows) ); + } + + { + MatrixType res, a(rows,cols), b(cols,0); + res = a*b; + VERIFY(res.rows()==rows && res.cols()==0); + b.resize(0,rows); + res = b*a; + VERIFY(res.rows()==0 && res.cols()==cols); + } + + { + Matrix a; + Matrix b; + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) ); + } + + { + Matrix a; + Matrix b; + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) ); + } + + { + Matrix a(PacketSize,0); + Matrix b(0,1); + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize,1) ); + } + + { + Matrix a(PacketSize1,0); + Matrix b(0,1); + Matrix res; + VERIFY_IS_APPROX( (res=a*b), MatrixType::Zero(PacketSize1,1) ); + VERIFY_IS_APPROX( (res=a.lazyProduct(b)), MatrixType::Zero(PacketSize1,1) ); + } +} + +template +void bug_127() +{ + // Bug 127 + // + // a product of the form lhs*rhs with + // + // lhs: + // rows = 1, cols = 4 + // RowsAtCompileTime = 1, ColsAtCompileTime = -1 + // MaxRowsAtCompileTime = 1, MaxColsAtCompileTime = 5 + // + // rhs: + // rows = 4, cols = 0 + // RowsAtCompileTime = -1, ColsAtCompileTime = -1 + // MaxRowsAtCompileTime = 5, MaxColsAtCompileTime = 1 + // + // was failing on a runtime assertion, because it had been mis-compiled as a dot product because Product.h was using the + // max-sizes to detect size 1 indicating vectors, and that didn't account for 0-sized object with max-size 1. + + Matrix a(1,4); + Matrix b(4,0); + a*b; +} + +template void bug_817() +{ + ArrayXXf B = ArrayXXf::Random(10,10), C; + VectorXf x = VectorXf::Random(10); + C = (x.transpose()*B.matrix()); + B = (x.transpose()*B.matrix()); + VERIFY_IS_APPROX(B,C); +} + +template +void unaligned_objects() +{ + // Regression test for the bug reported here: + // http://forum.kde.org/viewtopic.php?f=74&t=107541 + // Recall the matrix*vector kernel avoid unaligned loads by loading two packets and then reassemble then. + // There was a mistake in the computation of the valid range for fully unaligned objects: in some rare cases, + // memory was read outside the allocated matrix memory. Though the values were not used, this might raise segfault. + for(int m=450;m<460;++m) + { + for(int n=8;n<12;++n) + { + MatrixXf M(m, n); + VectorXf v1(n), r1(500); + RowVectorXf v2(m), r2(16); + + M.setRandom(); + v1.setRandom(); + v2.setRandom(); + for(int o=0; o<4; ++o) + { + r1.segment(o,m).noalias() = M * v1; + VERIFY_IS_APPROX(r1.segment(o,m), M * MatrixXf(v1)); + r2.segment(o,n).noalias() = v2 * M; + VERIFY_IS_APPROX(r2.segment(o,n), MatrixXf(v2) * M); + } + } + } +} + +template +EIGEN_DONT_INLINE +Index test_compute_block_size(Index m, Index n, Index k) +{ + Index mc(m), nc(n), kc(k); + internal::computeProductBlockingSizes(kc, mc, nc); + return kc+mc+nc; +} + +template +Index compute_block_size() +{ + Index ret = 0; + ret += test_compute_block_size(0,1,1); + ret += test_compute_block_size(1,0,1); + ret += test_compute_block_size(1,1,0); + ret += test_compute_block_size(0,0,1); + ret += test_compute_block_size(0,1,0); + ret += test_compute_block_size(1,0,0); + ret += test_compute_block_size(0,0,0); + return ret; +} + +template +void aliasing_with_resize() +{ + Index m = internal::random(10,50); + Index n = internal::random(10,50); + MatrixXd A, B, C(m,n), D(m,m); + VectorXd a, b, c(n); + C.setRandom(); + D.setRandom(); + c.setRandom(); + double s = internal::random(1,10); + + A = C; + B = A * A.transpose(); + A = A * A.transpose(); + VERIFY_IS_APPROX(A,B); + + A = C; + B = (A * A.transpose())/s; + A = (A * A.transpose())/s; + VERIFY_IS_APPROX(A,B); + + A = C; + B = (A * A.transpose()) + D; + A = (A * A.transpose()) + D; + VERIFY_IS_APPROX(A,B); + + A = C; + B = D + (A * A.transpose()); + A = D + (A * A.transpose()); + VERIFY_IS_APPROX(A,B); + + A = C; + B = s * (A * A.transpose()); + A = s * (A * A.transpose()); + VERIFY_IS_APPROX(A,B); + + A = C; + a = c; + b = (A * a)/s; + a = (A * a)/s; + VERIFY_IS_APPROX(a,b); +} + +template +void bug_1308() +{ + int n = 10; + MatrixXd r(n,n); + VectorXd v = VectorXd::Random(n); + r = v * RowVectorXd::Ones(n); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n)); + r = VectorXd::Ones(n) * v.transpose(); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n).transpose()); + + Matrix4d ones44 = Matrix4d::Ones(); + Matrix4d m44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(m44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + + typedef Matrix RMatrix4d; + RMatrix4d r44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(r44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + +// RowVector4d r4; + m44.setOnes(); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.row(0).transpose() * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.col(0) * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.row(0), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.col(0).transpose(), ones44); +} + +void test_product_extra() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( product_extra(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( product_extra(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( mat_mat_scalar_scalar_product() ); + CALL_SUBTEST_3( product_extra(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + CALL_SUBTEST_5( bug_127<0>() ); + CALL_SUBTEST_5( bug_817<0>() ); + CALL_SUBTEST_5( bug_1308<0>() ); + CALL_SUBTEST_6( unaligned_objects<0>() ); + CALL_SUBTEST_7( compute_block_size() ); + CALL_SUBTEST_7( compute_block_size() ); + CALL_SUBTEST_7( compute_block_size >() ); + CALL_SUBTEST_8( aliasing_with_resize() ); + +} diff --git a/thirdparty/eigen/test/product_large.cpp b/thirdparty/eigen/test/product_large.cpp new file mode 100644 index 000000000..845cd40ca --- /dev/null +++ b/thirdparty/eigen/test/product_large.cpp @@ -0,0 +1,107 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "product.h" + +template +void test_aliasing() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + typedef Matrix MatrixType; + typedef Matrix VectorType; + VectorType x(cols); x.setRandom(); + VectorType z(x); + VectorType y(rows); y.setZero(); + MatrixType A(rows,cols); A.setRandom(); + // CwiseBinaryOp + VERIFY_IS_APPROX(x = y + A*x, A*z); // OK because "y + A*x" is marked as "assume-aliasing" + x = z; + // CwiseUnaryOp + VERIFY_IS_APPROX(x = T(1.)*(A*x), A*z); // OK because 1*(A*x) is replaced by (1*A*x) which is a Product<> expression + x = z; + // VERIFY_IS_APPROX(x = y-A*x, -A*z); // Not OK in 3.3 because x is resized before A*x gets evaluated + x = z; +} + +void test_product_large() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( product(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( product(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_3( product(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_4( product(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_5( product(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + + CALL_SUBTEST_1( test_aliasing() ); + } + +#if defined EIGEN_TEST_PART_6 + { + // test a specific issue in DiagonalProduct + int N = 1000000; + VectorXf v = VectorXf::Ones(N); + MatrixXf m = MatrixXf::Ones(N,3); + m = (v+v).asDiagonal() * m; + VERIFY_IS_APPROX(m, MatrixXf::Constant(N,3,2)); + } + + { + // test deferred resizing in Matrix::operator= + MatrixXf a = MatrixXf::Random(10,4), b = MatrixXf::Random(4,10), c = a; + VERIFY_IS_APPROX((a = a * b), (c * b).eval()); + } + + { + // check the functions to setup blocking sizes compile and do not segfault + // FIXME check they do what they are supposed to do !! + std::ptrdiff_t l1 = internal::random(10000,20000); + std::ptrdiff_t l2 = internal::random(100000,200000); + std::ptrdiff_t l3 = internal::random(1000000,2000000); + setCpuCacheSizes(l1,l2,l3); + VERIFY(l1==l1CacheSize()); + VERIFY(l2==l2CacheSize()); + std::ptrdiff_t k1 = internal::random(10,100)*16; + std::ptrdiff_t m1 = internal::random(10,100)*16; + std::ptrdiff_t n1 = internal::random(10,100)*16; + // only makes sure it compiles fine + internal::computeProductBlockingSizes(k1,m1,n1,1); + } + + { + // test regression in row-vector by matrix (bad Map type) + MatrixXf mat1(10,32); mat1.setRandom(); + MatrixXf mat2(32,32); mat2.setRandom(); + MatrixXf r1 = mat1.row(2)*mat2.transpose(); + VERIFY_IS_APPROX(r1, (mat1.row(2)*mat2.transpose()).eval()); + + MatrixXf r2 = mat1.row(2)*mat2; + VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); + } + + { + Eigen::MatrixXd A(10,10), B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } +#endif + + // Regression test for bug 714: +#if defined EIGEN_HAS_OPENMP + omp_set_dynamic(1); + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_6( product(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } +#endif +} diff --git a/thirdparty/eigen/test/product_mmtr.cpp b/thirdparty/eigen/test/product_mmtr.cpp new file mode 100644 index 000000000..b66529acd --- /dev/null +++ b/thirdparty/eigen/test/product_mmtr.cpp @@ -0,0 +1,76 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#define CHECK_MMTR(DEST, TRI, OP) { \ + ref2 = ref1 = DEST; \ + DEST.template triangularView() OP; \ + ref1 OP; \ + ref2.template triangularView() \ + = ref1.template triangularView(); \ + VERIFY_IS_APPROX(DEST,ref2); \ + } + +template void mmtr(int size) +{ + typedef Matrix MatrixColMaj; + typedef Matrix MatrixRowMaj; + + DenseIndex othersize = internal::random(1,200); + + MatrixColMaj matc = MatrixColMaj::Zero(size, size); + MatrixRowMaj matr = MatrixRowMaj::Zero(size, size); + MatrixColMaj ref1(size, size), ref2(size, size); + + MatrixColMaj soc(size,othersize); soc.setRandom(); + MatrixColMaj osc(othersize,size); osc.setRandom(); + MatrixRowMaj sor(size,othersize); sor.setRandom(); + MatrixRowMaj osr(othersize,size); osr.setRandom(); + MatrixColMaj sqc(size,size); sqc.setRandom(); + MatrixRowMaj sqr(size,size); sqr.setRandom(); + + Scalar s = internal::random(); + + CHECK_MMTR(matc, Lower, = s*soc*sor.adjoint()); + CHECK_MMTR(matc, Upper, = s*(soc*soc.adjoint())); + CHECK_MMTR(matr, Lower, = s*soc*soc.adjoint()); + CHECK_MMTR(matr, Upper, = soc*(s*sor.adjoint())); + + CHECK_MMTR(matc, Lower, += s*soc*soc.adjoint()); + CHECK_MMTR(matc, Upper, += s*(soc*sor.transpose())); + CHECK_MMTR(matr, Lower, += s*sor*soc.adjoint()); + CHECK_MMTR(matr, Upper, += soc*(s*soc.adjoint())); + + CHECK_MMTR(matc, Lower, -= s*soc*soc.adjoint()); + CHECK_MMTR(matc, Upper, -= s*(osc.transpose()*osc.conjugate())); + CHECK_MMTR(matr, Lower, -= s*soc*soc.adjoint()); + CHECK_MMTR(matr, Upper, -= soc*(s*soc.adjoint())); + + CHECK_MMTR(matc, Lower, -= s*sqr*sqc.template triangularView()); + CHECK_MMTR(matc, Upper, = s*sqc*sqr.template triangularView()); + CHECK_MMTR(matc, Lower, += s*sqr*sqc.template triangularView()); + CHECK_MMTR(matc, Upper, = s*sqc*sqc.template triangularView()); + + CHECK_MMTR(matc, Lower, = (s*sqr).template triangularView()*sqc); + CHECK_MMTR(matc, Upper, -= (s*sqc).template triangularView()*sqc); + CHECK_MMTR(matc, Lower, = (s*sqr).template triangularView()*sqc); + CHECK_MMTR(matc, Upper, += (s*sqc).template triangularView()*sqc); +} + +void test_product_mmtr() +{ + for(int i = 0; i < g_repeat ; i++) + { + CALL_SUBTEST_1((mmtr(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_2((mmtr(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_3((mmtr >(internal::random(1,EIGEN_TEST_MAX_SIZE/2)))); + CALL_SUBTEST_4((mmtr >(internal::random(1,EIGEN_TEST_MAX_SIZE/2)))); + } +} diff --git a/thirdparty/eigen/test/product_notemporary.cpp b/thirdparty/eigen/test/product_notemporary.cpp new file mode 100644 index 000000000..2bb19a681 --- /dev/null +++ b/thirdparty/eigen/test/product_notemporary.cpp @@ -0,0 +1,155 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define TEST_ENABLE_TEMPORARY_TRACKING + +#include "main.h" + +template void product_notemporary(const MatrixType& m) +{ + /* This test checks the number of temporaries created + * during the evaluation of a complex expression */ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix RowVectorType; + typedef Matrix ColVectorType; + typedef Matrix ColMajorMatrixType; + typedef Matrix RowMajorMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + ColMajorMatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols); + RowVectorType rv1 = RowVectorType::Random(rows), rvres(rows); + ColVectorType cv1 = ColVectorType::Random(cols), cvres(cols); + RowMajorMatrixType rm3(rows, cols); + + Scalar s1 = internal::random(), + s2 = internal::random(), + s3 = internal::random(); + + Index c0 = internal::random(4,cols-8), + c1 = internal::random(8,cols-c0), + r0 = internal::random(4,cols-8), + r1 = internal::random(8,rows-r0); + + VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()), 1); + VERIFY_EVALUATION_COUNT( m3 = (m1 * m2.adjoint()).transpose(), 1); + VERIFY_EVALUATION_COUNT( m3.noalias() = m1 * m2.adjoint(), 0); + + VERIFY_EVALUATION_COUNT( m3 = s1 * (m1 * m2.transpose()), 1); +// VERIFY_EVALUATION_COUNT( m3 = m3 + s1 * (m1 * m2.transpose()), 1); + VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0); + + VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()), 1); + + VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()).transpose(), 1); + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 - m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 - m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 - m1 * m2.transpose(), 0); + + VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1); + VERIFY_EVALUATION_COUNT( m3.noalias() = (s1 * m1).adjoint() * s2 * m2, 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += s1 * (-m1*s3).adjoint() * (s2 * m2 * s3), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= s1 * (m1.transpose() * m2), 0); + + VERIFY_EVALUATION_COUNT(( m3.block(r0,r0,r1,r1).noalias() += -m1.block(r0,c0,r1,c1) * (s2*m2.block(r0,c0,r1,c1)).adjoint() ), 0); + VERIFY_EVALUATION_COUNT(( m3.block(r0,r0,r1,r1).noalias() -= s1 * m1.block(r0,c0,r1,c1) * m2.block(c0,r0,c1,r1) ), 0); + + // NOTE this is because the Block expression is not handled yet by our expression analyser + VERIFY_EVALUATION_COUNT(( m3.block(r0,r0,r1,r1).noalias() = s1 * m1.block(r0,c0,r1,c1) * (s1*m2).block(c0,r0,c1,r1) ), 1); + + VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).template triangularView() * m2, 0); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * (m2+m2), 1); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * m2.adjoint(), 0); + + VERIFY_EVALUATION_COUNT( m3.template triangularView() = (m1 * m2.adjoint()), 0); + VERIFY_EVALUATION_COUNT( m3.template triangularView() -= (m1 * m2.adjoint()), 0); + + // NOTE this is because the blas_traits require innerstride==1 to avoid a temporary, but that doesn't seem to be actually needed for the triangular products + VERIFY_EVALUATION_COUNT( rm3.col(c0).noalias() = (s1 * m1.adjoint()).template triangularView() * (s2*m2.row(c0)).adjoint(), 1); + + VERIFY_EVALUATION_COUNT( m1.template triangularView().solveInPlace(m3), 0); + VERIFY_EVALUATION_COUNT( m1.adjoint().template triangularView().solveInPlace(m3.transpose()), 0); + + VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).adjoint().template selfadjointView() * (-m2*s3).adjoint(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = s2 * m2.adjoint() * (s1 * m1.adjoint()).template selfadjointView(), 0); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template selfadjointView() * m2.adjoint(), 0); + + // NOTE this is because the blas_traits require innerstride==1 to avoid a temporary, but that doesn't seem to be actually needed for the triangular products + VERIFY_EVALUATION_COUNT( m3.col(c0).noalias() = (s1 * m1).adjoint().template selfadjointView() * (-m2.row(c0)*s3).adjoint(), 1); + VERIFY_EVALUATION_COUNT( m3.col(c0).noalias() -= (s1 * m1).adjoint().template selfadjointView() * (-m2.row(c0)*s3).adjoint(), 1); + + VERIFY_EVALUATION_COUNT( m3.block(r0,c0,r1,c1).noalias() += m1.block(r0,r0,r1,r1).template selfadjointView() * (s1*m2.block(r0,c0,r1,c1)), 0); + VERIFY_EVALUATION_COUNT( m3.block(r0,c0,r1,c1).noalias() = m1.block(r0,r0,r1,r1).template selfadjointView() * m2.block(r0,c0,r1,c1), 0); + + VERIFY_EVALUATION_COUNT( m3.template selfadjointView().rankUpdate(m2.adjoint()), 0); + + // Here we will get 1 temporary for each resize operation of the lhs operator; resize(r1,c1) would lead to zero temporaries + m3.resize(1,1); + VERIFY_EVALUATION_COUNT( m3.noalias() = m1.block(r0,r0,r1,r1).template selfadjointView() * m2.block(r0,c0,r1,c1), 1); + m3.resize(1,1); + VERIFY_EVALUATION_COUNT( m3.noalias() = m1.block(r0,r0,r1,r1).template triangularView() * m2.block(r0,c0,r1,c1), 1); + + // Zero temporaries for lazy products ... + VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose().lazyProduct(m3)).diagonal().sum(), 0 ); + + // ... and even no temporary for even deeply (>=2) nested products + VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose() * m3).diagonal().sum(), 0 ); + VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose() * m3).diagonal().array().abs().sum(), 0 ); + + // Zero temporaries for ... CoeffBasedProductMode + VERIFY_EVALUATION_COUNT( m3.col(0).template head<5>() * m3.col(0).transpose() + m3.col(0).template head<5>() * m3.col(0).transpose(), 0 ); + + // Check matrix * vectors + VERIFY_EVALUATION_COUNT( cvres.noalias() = m1 * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.col(0), 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * rv1.adjoint(), 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.row(0).transpose(), 0 ); + + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 ); + + // Check outer products + m3 = cv1 * rv1; + VERIFY_EVALUATION_COUNT( m3.noalias() = cv1 * rv1, 0 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (cv1+cv1) * (rv1+rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() += (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 ); +} + +void test_product_notemporary() +{ + int s; + for(int i = 0; i < g_repeat; i++) { + s = internal::random(16,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( product_notemporary(MatrixXf(s, s)) ); + CALL_SUBTEST_2( product_notemporary(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(16,EIGEN_TEST_MAX_SIZE/2); + CALL_SUBTEST_3( product_notemporary(MatrixXcf(s,s)) ); + CALL_SUBTEST_4( product_notemporary(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } +} diff --git a/thirdparty/eigen/test/product_selfadjoint.cpp b/thirdparty/eigen/test/product_selfadjoint.cpp new file mode 100644 index 000000000..3d768aa7e --- /dev/null +++ b/thirdparty/eigen/test/product_selfadjoint.cpp @@ -0,0 +1,87 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void product_selfadjoint(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix VectorType; + typedef Matrix RowVectorType; + + typedef Matrix RhsMatrixType; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3; + VectorType v1 = VectorType::Random(rows), + v2 = VectorType::Random(rows), + v3(rows); + RowVectorType r1 = RowVectorType::Random(rows), + r2 = RowVectorType::Random(rows); + RhsMatrixType m4 = RhsMatrixType::Random(rows,10); + + Scalar s1 = internal::random(), + s2 = internal::random(), + s3 = internal::random(); + + m1 = (m1.adjoint() + m1).eval(); + + // rank2 update + m2 = m1.template triangularView(); + m2.template selfadjointView().rankUpdate(v1,v2); + VERIFY_IS_APPROX(m2, (m1 + v1 * v2.adjoint()+ v2 * v1.adjoint()).template triangularView().toDenseMatrix()); + + m2 = m1.template triangularView(); + m2.template selfadjointView().rankUpdate(-v1,s2*v2,s3); + VERIFY_IS_APPROX(m2, (m1 + (s3*(-v1)*(s2*v2).adjoint()+numext::conj(s3)*(s2*v2)*(-v1).adjoint())).template triangularView().toDenseMatrix()); + + m2 = m1.template triangularView(); + m2.template selfadjointView().rankUpdate(-s2*r1.adjoint(),r2.adjoint()*s3,s1); + VERIFY_IS_APPROX(m2, (m1 + s1*(-s2*r1.adjoint())*(r2.adjoint()*s3).adjoint() + numext::conj(s1)*(r2.adjoint()*s3) * (-s2*r1.adjoint()).adjoint()).template triangularView().toDenseMatrix()); + + if (rows>1) + { + m2 = m1.template triangularView(); + m2.block(1,1,rows-1,cols-1).template selfadjointView().rankUpdate(v1.tail(rows-1),v2.head(cols-1)); + m3 = m1; + m3.block(1,1,rows-1,cols-1) += v1.tail(rows-1) * v2.head(cols-1).adjoint()+ v2.head(cols-1) * v1.tail(rows-1).adjoint(); + VERIFY_IS_APPROX(m2, m3.template triangularView().toDenseMatrix()); + } +} + +void test_product_selfadjoint() +{ + int s = 0; + for(int i = 0; i < g_repeat ; i++) { + CALL_SUBTEST_1( product_selfadjoint(Matrix()) ); + CALL_SUBTEST_2( product_selfadjoint(Matrix()) ); + CALL_SUBTEST_3( product_selfadjoint(Matrix3d()) ); + + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); + CALL_SUBTEST_4( product_selfadjoint(MatrixXcf(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); + CALL_SUBTEST_5( product_selfadjoint(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_6( product_selfadjoint(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_7( product_selfadjoint(Matrix(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } +} diff --git a/thirdparty/eigen/test/product_small.cpp b/thirdparty/eigen/test/product_small.cpp new file mode 100644 index 000000000..fdfdd9f6c --- /dev/null +++ b/thirdparty/eigen/test/product_small.cpp @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT +#include "product.h" +#include + +// regression test for bug 447 +template +void product1x1() +{ + Matrix matAstatic; + Matrix matBstatic; + matAstatic.setRandom(); + matBstatic.setRandom(); + VERIFY_IS_APPROX( (matAstatic * matBstatic).coeff(0,0), + matAstatic.cwiseProduct(matBstatic.transpose()).sum() ); + + MatrixXf matAdynamic(1,3); + MatrixXf matBdynamic(3,1); + matAdynamic.setRandom(); + matBdynamic.setRandom(); + VERIFY_IS_APPROX( (matAdynamic * matBdynamic).coeff(0,0), + matAdynamic.cwiseProduct(matBdynamic.transpose()).sum() ); +} + +template +const TC& ref_prod(TC &C, const TA &A, const TB &B) +{ + for(Index i=0;i +typename internal::enable_if::type +test_lazy_single(int rows, int cols, int depth) +{ + Matrix A(rows,depth); A.setRandom(); + Matrix B(depth,cols); B.setRandom(); + Matrix C(rows,cols); C.setRandom(); + Matrix D(C); + VERIFY_IS_APPROX(C+=A.lazyProduct(B), ref_prod(D,A,B)); +} + +template +typename internal::enable_if< ( (Rows ==1&&Depth!=1&&OA==ColMajor) + || (Depth==1&&Rows !=1&&OA==RowMajor) + || (Cols ==1&&Depth!=1&&OB==RowMajor) + || (Depth==1&&Cols !=1&&OB==ColMajor) + || (Rows ==1&&Cols !=1&&OC==ColMajor) + || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>::type +test_lazy_single(int, int, int) +{ +} + +template +void test_lazy_all_layout(int rows=Rows, int cols=Cols, int depth=Depth) +{ + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); +} + +template +void test_lazy_l1() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // Inner + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,1,depth) )); + + // Outer + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(7,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,cols) )); +} + +template +void test_lazy_l2() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // mat-vec + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,1,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,1,depth) )); + + // vec-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(1,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols,depth) )); +} + +template +void test_lazy_l3() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + // mat-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,3,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,6,depth) )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(8,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(3,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols,depth) )); +} + +template +void test_linear_but_not_vectorizable() +{ + // Check tricky cases for which the result of the product is a vector and thus must exhibit the LinearBit flag, + // but is not vectorizable along the linear dimension. + Index n = N==Dynamic ? internal::random(1,32) : N; + Index m = M==Dynamic ? internal::random(1,32) : M; + Index k = K==Dynamic ? internal::random(1,32) : K; + + { + Matrix A; A.setRandom(n,m+1); + Matrix B; B.setRandom(m*2,k); + Matrix C; + Matrix R; + + C.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()); + R.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()).eval(); + VERIFY_IS_APPROX(C,R); + } + + { + Matrix A; A.setRandom(m+1,n); + Matrix B; B.setRandom(k,m*2); + Matrix C; + Matrix R; + + C.noalias() = (B.template leftCols()+B.template rightCols()) * A.template topLeftCorner(); + R.noalias() = (B.template leftCols()+B.template rightCols()).eval() * A.template topLeftCorner(); + VERIFY_IS_APPROX(C,R); + } +} + +template +void bug_1311() +{ + Matrix< double, Rows, 2 > A; A.setRandom(); + Vector2d b = Vector2d::Random() ; + Matrix res; + res.noalias() = 1. * (A * b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = 1.*A * b; + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); +} + +void test_product_small() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( product(Matrix()) ); + CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_8( product(Matrix()) ); + CALL_SUBTEST_3( product(Matrix3d()) ); + CALL_SUBTEST_4( product(Matrix4d()) ); + CALL_SUBTEST_5( product(Matrix4f()) ); + CALL_SUBTEST_6( product1x1<0>() ); + + CALL_SUBTEST_11( test_lazy_l1() ); + CALL_SUBTEST_12( test_lazy_l2() ); + CALL_SUBTEST_13( test_lazy_l3() ); + + CALL_SUBTEST_21( test_lazy_l1() ); + CALL_SUBTEST_22( test_lazy_l2() ); + CALL_SUBTEST_23( test_lazy_l3() ); + + CALL_SUBTEST_31( test_lazy_l1 >() ); + CALL_SUBTEST_32( test_lazy_l2 >() ); + CALL_SUBTEST_33( test_lazy_l3 >() ); + + CALL_SUBTEST_41( test_lazy_l1 >() ); + CALL_SUBTEST_42( test_lazy_l2 >() ); + CALL_SUBTEST_43( test_lazy_l3 >() ); + + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + + CALL_SUBTEST_6( bug_1311<3>() ); + CALL_SUBTEST_6( bug_1311<5>() ); + } + +#ifdef EIGEN_TEST_PART_6 + { + // test compilation of (outer_product) * vector + Vector3f v = Vector3f::Random(); + VERIFY_IS_APPROX( (v * v.transpose()) * v, (v * v.transpose()).eval() * v); + } + + { + // regression test for pull-request #93 + Eigen::Matrix A; A.setRandom(); + Eigen::Matrix B; B.setRandom(); + Eigen::Matrix C; C.setRandom(); + VERIFY_IS_APPROX(B * A.inverse(), B * A.inverse()[0]); + VERIFY_IS_APPROX(A.inverse() * C, A.inverse()[0] * C); + } + + { + Eigen::Matrix A, B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } +#endif +} diff --git a/thirdparty/eigen/test/product_symm.cpp b/thirdparty/eigen/test/product_symm.cpp new file mode 100644 index 000000000..74d7329b1 --- /dev/null +++ b/thirdparty/eigen/test/product_symm.cpp @@ -0,0 +1,94 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void symm(int size = Size, int othersize = OtherSize) +{ + typedef Matrix MatrixType; + typedef Matrix Rhs1; + typedef Matrix Rhs2; + enum { order = OtherSize==1 ? 0 : RowMajor }; + typedef Matrix Rhs3; + typedef typename MatrixType::Index Index; + + Index rows = size; + Index cols = size; + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), m3; + + m1 = (m1+m1.adjoint()).eval(); + + Rhs1 rhs1 = Rhs1::Random(cols, othersize), rhs12(cols, othersize), rhs13(cols, othersize); + Rhs2 rhs2 = Rhs2::Random(othersize, rows), rhs22(othersize, rows), rhs23(othersize, rows); + Rhs3 rhs3 = Rhs3::Random(cols, othersize), rhs32(cols, othersize), rhs33(cols, othersize); + + Scalar s1 = internal::random(), + s2 = internal::random(); + + m2 = m1.template triangularView(); + m3 = m2.template selfadjointView(); + VERIFY_IS_EQUAL(m1, m3); + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView() * (s2*rhs1), + rhs13 = (s1*m1) * (s2*rhs1)); + + m2 = m1.template triangularView(); rhs12.setRandom(); rhs13 = rhs12; + m3 = m2.template selfadjointView(); + VERIFY_IS_EQUAL(m1, m3); + VERIFY_IS_APPROX(rhs12 += (s1*m2).template selfadjointView() * (s2*rhs1), + rhs13 += (s1*m1) * (s2*rhs1)); + + m2 = m1.template triangularView(); + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView() * (s2*rhs2.adjoint()), + rhs13 = (s1*m1) * (s2*rhs2.adjoint())); + + m2 = m1.template triangularView(); + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView() * (s2*rhs2.adjoint()), + rhs13 = (s1*m1) * (s2*rhs2.adjoint())); + + m2 = m1.template triangularView(); + VERIFY_IS_APPROX(rhs12 = (s1*m2.adjoint()).template selfadjointView() * (s2*rhs2.adjoint()), + rhs13 = (s1*m1.adjoint()) * (s2*rhs2.adjoint())); + + // test row major = <...> + m2 = m1.template triangularView(); rhs12.setRandom(); rhs13 = rhs12; + VERIFY_IS_APPROX(rhs12 -= (s1*m2).template selfadjointView() * (s2*rhs3), + rhs13 -= (s1*m1) * (s2 * rhs3)); + + m2 = m1.template triangularView(); + VERIFY_IS_APPROX(rhs12 = (s1*m2.adjoint()).template selfadjointView() * (s2*rhs3).conjugate(), + rhs13 = (s1*m1.adjoint()) * (s2*rhs3).conjugate()); + + + m2 = m1.template triangularView(); rhs13 = rhs12; + VERIFY_IS_APPROX(rhs12.noalias() += s1 * ((m2.adjoint()).template selfadjointView() * (s2*rhs3).conjugate()), + rhs13 += (s1*m1.adjoint()) * (s2*rhs3).conjugate()); + + m2 = m1.template triangularView(); + VERIFY_IS_APPROX(rhs22 = (rhs2) * (m2).template selfadjointView(), rhs23 = (rhs2) * (m1)); + VERIFY_IS_APPROX(rhs22 = (s2*rhs2) * (s1*m2).template selfadjointView(), rhs23 = (s2*rhs2) * (s1*m1)); + +} + +void test_product_symm() +{ + for(int i = 0; i < g_repeat ; i++) + { + CALL_SUBTEST_1(( symm(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + CALL_SUBTEST_2(( symm(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + CALL_SUBTEST_3(( symm,Dynamic,Dynamic>(internal::random(1,EIGEN_TEST_MAX_SIZE/2),internal::random(1,EIGEN_TEST_MAX_SIZE/2)) )); + CALL_SUBTEST_4(( symm,Dynamic,Dynamic>(internal::random(1,EIGEN_TEST_MAX_SIZE/2),internal::random(1,EIGEN_TEST_MAX_SIZE/2)) )); + + CALL_SUBTEST_5(( symm(internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + CALL_SUBTEST_6(( symm(internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + CALL_SUBTEST_7(( symm,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + CALL_SUBTEST_8(( symm,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)) )); + } +} diff --git a/thirdparty/eigen/test/product_syrk.cpp b/thirdparty/eigen/test/product_syrk.cpp new file mode 100644 index 000000000..e10f0f2f2 --- /dev/null +++ b/thirdparty/eigen/test/product_syrk.cpp @@ -0,0 +1,136 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void syrk(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef Matrix RMatrixType; + typedef Matrix Rhs1; + typedef Matrix Rhs2; + typedef Matrix Rhs3; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3 = MatrixType::Random(rows, cols); + RMatrixType rm2 = MatrixType::Random(rows, cols); + + Rhs1 rhs1 = Rhs1::Random(internal::random(1,320), cols); Rhs1 rhs11 = Rhs1::Random(rhs1.rows(), cols); + Rhs2 rhs2 = Rhs2::Random(rows, internal::random(1,320)); Rhs2 rhs22 = Rhs2::Random(rows, rhs2.cols()); + Rhs3 rhs3 = Rhs3::Random(internal::random(1,320), rows); + + Scalar s1 = internal::random(); + + Index c = internal::random(0,cols-1); + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(rhs2,s1)._expression()), + ((s1 * rhs2 * rhs2.adjoint()).eval().template triangularView().toDenseMatrix())); + m2.setZero(); + VERIFY_IS_APPROX(((m2.template triangularView() += s1 * rhs2 * rhs22.adjoint()).nestedExpression()), + ((s1 * rhs2 * rhs22.adjoint()).eval().template triangularView().toDenseMatrix())); + + + m2.setZero(); + VERIFY_IS_APPROX(m2.template selfadjointView().rankUpdate(rhs2,s1)._expression(), + (s1 * rhs2 * rhs2.adjoint()).eval().template triangularView().toDenseMatrix()); + m2.setZero(); + VERIFY_IS_APPROX((m2.template triangularView() += s1 * rhs22 * rhs2.adjoint()).nestedExpression(), + (s1 * rhs22 * rhs2.adjoint()).eval().template triangularView().toDenseMatrix()); + + + m2.setZero(); + VERIFY_IS_APPROX(m2.template selfadjointView().rankUpdate(rhs1.adjoint(),s1)._expression(), + (s1 * rhs1.adjoint() * rhs1).eval().template triangularView().toDenseMatrix()); + m2.setZero(); + VERIFY_IS_APPROX((m2.template triangularView() += s1 * rhs11.adjoint() * rhs1).nestedExpression(), + (s1 * rhs11.adjoint() * rhs1).eval().template triangularView().toDenseMatrix()); + + + m2.setZero(); + VERIFY_IS_APPROX(m2.template selfadjointView().rankUpdate(rhs1.adjoint(),s1)._expression(), + (s1 * rhs1.adjoint() * rhs1).eval().template triangularView().toDenseMatrix()); + VERIFY_IS_APPROX((m2.template triangularView() = s1 * rhs1.adjoint() * rhs11).nestedExpression(), + (s1 * rhs1.adjoint() * rhs11).eval().template triangularView().toDenseMatrix()); + + + m2.setZero(); + VERIFY_IS_APPROX(m2.template selfadjointView().rankUpdate(rhs3.adjoint(),s1)._expression(), + (s1 * rhs3.adjoint() * rhs3).eval().template triangularView().toDenseMatrix()); + + m2.setZero(); + VERIFY_IS_APPROX(m2.template selfadjointView().rankUpdate(rhs3.adjoint(),s1)._expression(), + (s1 * rhs3.adjoint() * rhs3).eval().template triangularView().toDenseMatrix()); + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(m1.col(c),s1)._expression()), + ((s1 * m1.col(c) * m1.col(c).adjoint()).eval().template triangularView().toDenseMatrix())); + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(m1.col(c),s1)._expression()), + ((s1 * m1.col(c) * m1.col(c).adjoint()).eval().template triangularView().toDenseMatrix())); + rm2.setZero(); + VERIFY_IS_APPROX((rm2.template selfadjointView().rankUpdate(m1.col(c),s1)._expression()), + ((s1 * m1.col(c) * m1.col(c).adjoint()).eval().template triangularView().toDenseMatrix())); + m2.setZero(); + VERIFY_IS_APPROX((m2.template triangularView() += s1 * m3.col(c) * m1.col(c).adjoint()).nestedExpression(), + ((s1 * m3.col(c) * m1.col(c).adjoint()).eval().template triangularView().toDenseMatrix())); + rm2.setZero(); + VERIFY_IS_APPROX((rm2.template triangularView() += s1 * m1.col(c) * m3.col(c).adjoint()).nestedExpression(), + ((s1 * m1.col(c) * m3.col(c).adjoint()).eval().template triangularView().toDenseMatrix())); + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(m1.col(c).conjugate(),s1)._expression()), + ((s1 * m1.col(c).conjugate() * m1.col(c).conjugate().adjoint()).eval().template triangularView().toDenseMatrix())); + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(m1.col(c).conjugate(),s1)._expression()), + ((s1 * m1.col(c).conjugate() * m1.col(c).conjugate().adjoint()).eval().template triangularView().toDenseMatrix())); + + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(m1.row(c),s1)._expression()), + ((s1 * m1.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView().toDenseMatrix())); + rm2.setZero(); + VERIFY_IS_APPROX((rm2.template selfadjointView().rankUpdate(m1.row(c),s1)._expression()), + ((s1 * m1.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView().toDenseMatrix())); + m2.setZero(); + VERIFY_IS_APPROX((m2.template triangularView() += s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).nestedExpression(), + ((s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView().toDenseMatrix())); + rm2.setZero(); + VERIFY_IS_APPROX((rm2.template triangularView() += s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).nestedExpression(), + ((s1 * m3.row(c).transpose() * m1.row(c).transpose().adjoint()).eval().template triangularView().toDenseMatrix())); + + + m2.setZero(); + VERIFY_IS_APPROX((m2.template selfadjointView().rankUpdate(m1.row(c).adjoint(),s1)._expression()), + ((s1 * m1.row(c).adjoint() * m1.row(c).adjoint().adjoint()).eval().template triangularView().toDenseMatrix())); +} + +void test_product_syrk() +{ + for(int i = 0; i < g_repeat ; i++) + { + int s; + s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( syrk(MatrixXf(s, s)) ); + CALL_SUBTEST_2( syrk(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); + CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) ); + CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } +} diff --git a/thirdparty/eigen/test/product_trmm.cpp b/thirdparty/eigen/test/product_trmm.cpp new file mode 100644 index 000000000..12e554410 --- /dev/null +++ b/thirdparty/eigen/test/product_trmm.cpp @@ -0,0 +1,115 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +int get_random_size() +{ + const int factor = NumTraits::ReadCost; + const int max_test_size = EIGEN_TEST_MAX_SIZE>2*factor ? EIGEN_TEST_MAX_SIZE/factor : EIGEN_TEST_MAX_SIZE; + return internal::random(1,max_test_size); +} + +template +void trmm(int rows=get_random_size(), + int cols=get_random_size(), + int otherCols = OtherCols==Dynamic?get_random_size():OtherCols) +{ + typedef Matrix TriMatrix; + typedef Matrix OnTheRight; + typedef Matrix OnTheLeft; + + typedef Matrix ResXS; + typedef Matrix ResSX; + + TriMatrix mat(rows,cols), tri(rows,cols), triTr(cols,rows); + + OnTheRight ge_right(cols,otherCols); + OnTheLeft ge_left(otherCols,rows); + ResSX ge_sx, ge_sx_save; + ResXS ge_xs, ge_xs_save; + + Scalar s1 = internal::random(), + s2 = internal::random(); + + mat.setRandom(); + tri = mat.template triangularView(); + triTr = mat.transpose().template triangularView(); + ge_right.setRandom(); + ge_left.setRandom(); + + VERIFY_IS_APPROX( ge_xs = mat.template triangularView() * ge_right, tri * ge_right); + VERIFY_IS_APPROX( ge_sx = ge_left * mat.template triangularView(), ge_left * tri); + + VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView() * ge_right, tri * ge_right); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_left * mat.template triangularView(), ge_left * tri); + + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView(), ge_right.transpose() * triTr.conjugate()); + + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()), s1*triTr.conjugate() * (s2*ge_left.adjoint())); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView(), ge_right.adjoint() * triTr.conjugate()); + + ge_xs_save = ge_xs; + VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()) ); + ge_sx.setRandom(); + ge_sx_save = ge_sx; + VERIFY_IS_APPROX( ge_sx_save - (ge_right.adjoint() * (-s1 * triTr).conjugate()).eval(), ge_sx.noalias() -= (ge_right.adjoint() * (-s1 * mat).adjoint().template triangularView()).eval()); + + VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint()); + + // TODO check with sub-matrix expressions ? +} + +template +void trmv(int rows=get_random_size(), int cols=get_random_size()) +{ + trmm(rows,cols,1); +} + +template +void trmm(int rows=get_random_size(), int cols=get_random_size(), int otherCols = get_random_size()) +{ + trmm(rows,cols,otherCols); +} + +#define CALL_ALL_ORDERS(NB,SCALAR,MODE) \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + EIGEN_CAT(CALL_SUBTEST_,NB)((trmm())); \ + \ + EIGEN_CAT(CALL_SUBTEST_1,NB)((trmv())); \ + EIGEN_CAT(CALL_SUBTEST_1,NB)((trmv())); + + +#define CALL_ALL(NB,SCALAR) \ + CALL_ALL_ORDERS(EIGEN_CAT(1,NB),SCALAR,Upper) \ + CALL_ALL_ORDERS(EIGEN_CAT(2,NB),SCALAR,UnitUpper) \ + CALL_ALL_ORDERS(EIGEN_CAT(3,NB),SCALAR,StrictlyUpper) \ + CALL_ALL_ORDERS(EIGEN_CAT(1,NB),SCALAR,Lower) \ + CALL_ALL_ORDERS(EIGEN_CAT(2,NB),SCALAR,UnitLower) \ + CALL_ALL_ORDERS(EIGEN_CAT(3,NB),SCALAR,StrictlyLower) + + +void test_product_trmm() +{ + for(int i = 0; i < g_repeat ; i++) + { + CALL_ALL(1,float); // EIGEN_SUFFIXES;11;111;21;121;31;131 + CALL_ALL(2,double); // EIGEN_SUFFIXES;12;112;22;122;32;132 + CALL_ALL(3,std::complex); // EIGEN_SUFFIXES;13;113;23;123;33;133 + CALL_ALL(4,std::complex); // EIGEN_SUFFIXES;14;114;24;124;34;134 + } +} diff --git a/thirdparty/eigen/test/product_trmv.cpp b/thirdparty/eigen/test/product_trmv.cpp new file mode 100644 index 000000000..57a202afc --- /dev/null +++ b/thirdparty/eigen/test/product_trmv.cpp @@ -0,0 +1,91 @@ +// This file is triangularView of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void trmv(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + + RealScalar largerEps = 10*test_precision(); + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m3(rows, cols); + VectorType v1 = VectorType::Random(rows); + + Scalar s1 = internal::random(); + + m1 = MatrixType::Random(rows, cols); + + // check with a column-major matrix + m3 = m1.template triangularView(); + VERIFY((m3 * v1).isApprox(m1.template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3 * v1).isApprox(m1.template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3 * v1).isApprox(m1.template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3 * v1).isApprox(m1.template triangularView() * v1, largerEps)); + + // check conjugated and scalar multiple expressions (col-major) + m3 = m1.template triangularView(); + VERIFY(((s1*m3).conjugate() * v1).isApprox((s1*m1).conjugate().template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3.conjugate() * v1.conjugate()).isApprox(m1.conjugate().template triangularView() * v1.conjugate(), largerEps)); + + // check with a row-major matrix + m3 = m1.template triangularView(); + VERIFY((m3.transpose() * v1).isApprox(m1.transpose().template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3.transpose() * v1).isApprox(m1.transpose().template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3.transpose() * v1).isApprox(m1.transpose().template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3.transpose() * v1).isApprox(m1.transpose().template triangularView() * v1, largerEps)); + + // check conjugated and scalar multiple expressions (row-major) + m3 = m1.template triangularView(); + VERIFY((m3.adjoint() * v1).isApprox(m1.adjoint().template triangularView() * v1, largerEps)); + m3 = m1.template triangularView(); + VERIFY((m3.adjoint() * (s1*v1.conjugate())).isApprox(m1.adjoint().template triangularView() * (s1*v1.conjugate()), largerEps)); + m3 = m1.template triangularView(); + + // check transposed cases: + m3 = m1.template triangularView(); + VERIFY((v1.transpose() * m3).isApprox(v1.transpose() * m1.template triangularView(), largerEps)); + VERIFY((v1.adjoint() * m3).isApprox(v1.adjoint() * m1.template triangularView(), largerEps)); + VERIFY((v1.adjoint() * m3.adjoint()).isApprox(v1.adjoint() * m1.template triangularView().adjoint(), largerEps)); + + // TODO check with sub-matrices +} + +void test_product_trmv() +{ + int s = 0; + for(int i = 0; i < g_repeat ; i++) { + CALL_SUBTEST_1( trmv(Matrix()) ); + CALL_SUBTEST_2( trmv(Matrix()) ); + CALL_SUBTEST_3( trmv(Matrix3d()) ); + + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); + CALL_SUBTEST_4( trmv(MatrixXcf(s,s)) ); + CALL_SUBTEST_5( trmv(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + + s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_6( trmv(Matrix(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + } +} diff --git a/thirdparty/eigen/test/product_trsolve.cpp b/thirdparty/eigen/test/product_trsolve.cpp new file mode 100644 index 000000000..4b97fa9d6 --- /dev/null +++ b/thirdparty/eigen/test/product_trsolve.cpp @@ -0,0 +1,101 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#define VERIFY_TRSM(TRI,XB) { \ + (XB).setRandom(); ref = (XB); \ + (TRI).solveInPlace(XB); \ + VERIFY_IS_APPROX((TRI).toDenseMatrix() * (XB), ref); \ + (XB).setRandom(); ref = (XB); \ + (XB) = (TRI).solve(XB); \ + VERIFY_IS_APPROX((TRI).toDenseMatrix() * (XB), ref); \ + } + +#define VERIFY_TRSM_ONTHERIGHT(TRI,XB) { \ + (XB).setRandom(); ref = (XB); \ + (TRI).transpose().template solveInPlace(XB.transpose()); \ + VERIFY_IS_APPROX((XB).transpose() * (TRI).transpose().toDenseMatrix(), ref.transpose()); \ + (XB).setRandom(); ref = (XB); \ + (XB).transpose() = (TRI).transpose().template solve(XB.transpose()); \ + VERIFY_IS_APPROX((XB).transpose() * (TRI).transpose().toDenseMatrix(), ref.transpose()); \ + } + +template void trsolve(int size=Size,int cols=Cols) +{ + typedef typename NumTraits::Real RealScalar; + + Matrix cmLhs(size,size); + Matrix rmLhs(size,size); + + enum { colmajor = Size==1 ? RowMajor : ColMajor, + rowmajor = Cols==1 ? ColMajor : RowMajor }; + Matrix cmRhs(size,cols); + Matrix rmRhs(size,cols); + Matrix ref(size,cols); + + cmLhs.setRandom(); cmLhs *= static_cast(0.1); cmLhs.diagonal().array() += static_cast(1); + rmLhs.setRandom(); rmLhs *= static_cast(0.1); rmLhs.diagonal().array() += static_cast(1); + + VERIFY_TRSM(cmLhs.conjugate().template triangularView(), cmRhs); + VERIFY_TRSM(cmLhs.adjoint() .template triangularView(), cmRhs); + VERIFY_TRSM(cmLhs .template triangularView(), cmRhs); + VERIFY_TRSM(cmLhs .template triangularView(), rmRhs); + VERIFY_TRSM(cmLhs.conjugate().template triangularView(), rmRhs); + VERIFY_TRSM(cmLhs.adjoint() .template triangularView(), rmRhs); + + VERIFY_TRSM(cmLhs.conjugate().template triangularView(), cmRhs); + VERIFY_TRSM(cmLhs .template triangularView(), rmRhs); + + VERIFY_TRSM(rmLhs .template triangularView(), cmRhs); + VERIFY_TRSM(rmLhs.conjugate().template triangularView(), rmRhs); + + + VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView(), cmRhs); + VERIFY_TRSM_ONTHERIGHT(cmLhs .template triangularView(), cmRhs); + VERIFY_TRSM_ONTHERIGHT(cmLhs .template triangularView(), rmRhs); + VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView(), rmRhs); + + VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView(), cmRhs); + VERIFY_TRSM_ONTHERIGHT(cmLhs .template triangularView(), rmRhs); + + VERIFY_TRSM_ONTHERIGHT(rmLhs .template triangularView(), cmRhs); + VERIFY_TRSM_ONTHERIGHT(rmLhs.conjugate().template triangularView(), rmRhs); + + int c = internal::random(0,cols-1); + VERIFY_TRSM(rmLhs.template triangularView(), rmRhs.col(c)); + VERIFY_TRSM(cmLhs.template triangularView(), rmRhs.col(c)); +} + +void test_product_trsolve() +{ + for(int i = 0; i < g_repeat ; i++) + { + // matrices + CALL_SUBTEST_1((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_2((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_3((trsolve,Dynamic,Dynamic>(internal::random(1,EIGEN_TEST_MAX_SIZE/2),internal::random(1,EIGEN_TEST_MAX_SIZE/2)))); + CALL_SUBTEST_4((trsolve,Dynamic,Dynamic>(internal::random(1,EIGEN_TEST_MAX_SIZE/2),internal::random(1,EIGEN_TEST_MAX_SIZE/2)))); + + // vectors + CALL_SUBTEST_5((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_6((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_7((trsolve,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_8((trsolve,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + + // meta-unrollers + CALL_SUBTEST_9((trsolve())); + CALL_SUBTEST_10((trsolve())); + CALL_SUBTEST_11((trsolve,4,1>())); + CALL_SUBTEST_12((trsolve())); + CALL_SUBTEST_13((trsolve())); + CALL_SUBTEST_14((trsolve())); + + } +} diff --git a/thirdparty/eigen/test/qr.cpp b/thirdparty/eigen/test/qr.cpp new file mode 100644 index 000000000..dfcc1e8f9 --- /dev/null +++ b/thirdparty/eigen/test/qr.cpp @@ -0,0 +1,132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void qr(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + + Index rows = m.rows(); + Index cols = m.cols(); + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix MatrixQType; + + MatrixType a = MatrixType::Random(rows,cols); + HouseholderQR qrOfA(a); + + MatrixQType q = qrOfA.householderQ(); + VERIFY_IS_UNITARY(q); + + MatrixType r = qrOfA.matrixQR().template triangularView(); + VERIFY_IS_APPROX(a, qrOfA.householderQ() * r); +} + +template void qr_fixedsize() +{ + enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; + typedef typename MatrixType::Scalar Scalar; + Matrix m1 = Matrix::Random(); + HouseholderQR > qr(m1); + + Matrix r = qr.matrixQR(); + // FIXME need better way to construct trapezoid + for(int i = 0; i < Rows; i++) for(int j = 0; j < Cols; j++) if(i>j) r(i,j) = Scalar(0); + + VERIFY_IS_APPROX(m1, qr.householderQ() * r); + + Matrix m2 = Matrix::Random(Cols,Cols2); + Matrix m3 = m1*m2; + m2 = Matrix::Random(Cols,Cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); +} + +template void qr_invertible() +{ + using std::log; + using std::abs; + using std::pow; + using std::max; + typedef typename NumTraits::Real RealScalar; + typedef typename MatrixType::Scalar Scalar; + + int size = internal::random(10,50); + + MatrixType m1(size, size), m2(size, size), m3(size, size); + m1 = MatrixType::Random(size,size); + + if (internal::is_same::value) + { + // let's build a matrix more stable to inverse + MatrixType a = MatrixType::Random(size,size*4); + m1 += a * a.adjoint(); + } + + HouseholderQR qr(m1); + m3 = MatrixType::Random(size,size); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + + // now construct a matrix with prescribed determinant + m1.setZero(); + for(int i = 0; i < size; i++) m1(i,i) = internal::random(); + RealScalar absdet = abs(m1.diagonal().prod()); + m3 = qr.householderQ(); // get a unitary + m1 = m3 * m1 * m3; + qr.compute(m1); + VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant()); + // This test is tricky if the determinant becomes too small. + // Since we generate random numbers with magnitude rrange [0,1], the average determinant is 0.5^size + VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), numext::maxi(RealScalar(pow(0.5,size)),numext::maxi(abs(absdet),abs(qr.absDeterminant()))) ); + +} + +template void qr_verify_assert() +{ + MatrixType tmp; + + HouseholderQR qr; + VERIFY_RAISES_ASSERT(qr.matrixQR()) + VERIFY_RAISES_ASSERT(qr.solve(tmp)) + VERIFY_RAISES_ASSERT(qr.householderQ()) + VERIFY_RAISES_ASSERT(qr.absDeterminant()) + VERIFY_RAISES_ASSERT(qr.logAbsDeterminant()) +} + +void test_qr() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( qr(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_2( qr(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2),internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); + CALL_SUBTEST_3(( qr_fixedsize, 2 >() )); + CALL_SUBTEST_4(( qr_fixedsize, 4 >() )); + CALL_SUBTEST_5(( qr_fixedsize, 7 >() )); + CALL_SUBTEST_11( qr(Matrix()) ); + } + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( qr_invertible() ); + CALL_SUBTEST_6( qr_invertible() ); + CALL_SUBTEST_7( qr_invertible() ); + CALL_SUBTEST_8( qr_invertible() ); + } + + CALL_SUBTEST_9(qr_verify_assert()); + CALL_SUBTEST_10(qr_verify_assert()); + CALL_SUBTEST_1(qr_verify_assert()); + CALL_SUBTEST_6(qr_verify_assert()); + CALL_SUBTEST_7(qr_verify_assert()); + CALL_SUBTEST_8(qr_verify_assert()); + + // Test problem size constructors + CALL_SUBTEST_12(HouseholderQR(10, 20)); +} diff --git a/thirdparty/eigen/test/qr_colpivoting.cpp b/thirdparty/eigen/test/qr_colpivoting.cpp new file mode 100644 index 000000000..26ed27f5c --- /dev/null +++ b/thirdparty/eigen/test/qr_colpivoting.cpp @@ -0,0 +1,342 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template +void cod() { + typedef typename MatrixType::Index Index; + + Index rows = internal::random(2, EIGEN_TEST_MAX_SIZE); + Index cols = internal::random(2, EIGEN_TEST_MAX_SIZE); + Index cols2 = internal::random(2, EIGEN_TEST_MAX_SIZE); + Index rank = internal::random(1, (std::min)(rows, cols) - 1); + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix + MatrixQType; + MatrixType matrix; + createRandomPIMatrixOfRank(rank, rows, cols, matrix); + CompleteOrthogonalDecomposition cod(matrix); + VERIFY(rank == cod.rank()); + VERIFY(cols - cod.rank() == cod.dimensionOfKernel()); + VERIFY(!cod.isInjective()); + VERIFY(!cod.isInvertible()); + VERIFY(!cod.isSurjective()); + + MatrixQType q = cod.householderQ(); + VERIFY_IS_UNITARY(q); + + MatrixType z = cod.matrixZ(); + VERIFY_IS_UNITARY(z); + + MatrixType t; + t.setZero(rows, cols); + t.topLeftCorner(rank, rank) = + cod.matrixT().topLeftCorner(rank, rank).template triangularView(); + + MatrixType c = q * t * z * cod.colsPermutation().inverse(); + VERIFY_IS_APPROX(matrix, c); + + MatrixType exact_solution = MatrixType::Random(cols, cols2); + MatrixType rhs = matrix * exact_solution; + MatrixType cod_solution = cod.solve(rhs); + VERIFY_IS_APPROX(rhs, matrix * cod_solution); + + // Verify that we get the same minimum-norm solution as the SVD. + JacobiSVD svd(matrix, ComputeThinU | ComputeThinV); + MatrixType svd_solution = svd.solve(rhs); + VERIFY_IS_APPROX(cod_solution, svd_solution); + + MatrixType pinv = cod.pseudoInverse(); + VERIFY_IS_APPROX(cod_solution, pinv * rhs); +} + +template +void cod_fixedsize() { + enum { + Rows = MatrixType::RowsAtCompileTime, + Cols = MatrixType::ColsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + int rank = internal::random(1, (std::min)(int(Rows), int(Cols)) - 1); + Matrix matrix; + createRandomPIMatrixOfRank(rank, Rows, Cols, matrix); + CompleteOrthogonalDecomposition > cod(matrix); + VERIFY(rank == cod.rank()); + VERIFY(Cols - cod.rank() == cod.dimensionOfKernel()); + VERIFY(cod.isInjective() == (rank == Rows)); + VERIFY(cod.isSurjective() == (rank == Cols)); + VERIFY(cod.isInvertible() == (cod.isInjective() && cod.isSurjective())); + + Matrix exact_solution; + exact_solution.setRandom(Cols, Cols2); + Matrix rhs = matrix * exact_solution; + Matrix cod_solution = cod.solve(rhs); + VERIFY_IS_APPROX(rhs, matrix * cod_solution); + + // Verify that we get the same minimum-norm solution as the SVD. + JacobiSVD svd(matrix, ComputeFullU | ComputeFullV); + Matrix svd_solution = svd.solve(rhs); + VERIFY_IS_APPROX(cod_solution, svd_solution); +} + +template void qr() +{ + using std::sqrt; + typedef typename MatrixType::Index Index; + + Index rows = internal::random(2,EIGEN_TEST_MAX_SIZE), cols = internal::random(2,EIGEN_TEST_MAX_SIZE), cols2 = internal::random(2,EIGEN_TEST_MAX_SIZE); + Index rank = internal::random(1, (std::min)(rows, cols)-1); + + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix MatrixQType; + MatrixType m1; + createRandomPIMatrixOfRank(rank,rows,cols,m1); + ColPivHouseholderQR qr(m1); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel()); + VERIFY(!qr.isInjective()); + VERIFY(!qr.isInvertible()); + VERIFY(!qr.isSurjective()); + + MatrixQType q = qr.householderQ(); + VERIFY_IS_UNITARY(q); + + MatrixType r = qr.matrixQR().template triangularView(); + MatrixType c = q * r * qr.colsPermutation().inverse(); + VERIFY_IS_APPROX(m1, c); + + // Verify that the absolute value of the diagonal elements in R are + // non-increasing until they reach the singularity threshold. + RealScalar threshold = + sqrt(RealScalar(rows)) * numext::abs(r(0, 0)) * NumTraits::epsilon(); + for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) { + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); + if (x < threshold && y < threshold) continue; + if (!test_isApproxOrLessThan(y, x)) { + for (Index j = 0; j < (std::min)(rows, cols); ++j) { + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; + } + std::cout << "Failure at i=" << i << ", rank=" << rank + << ", threshold=" << threshold << std::endl; + } + VERIFY_IS_APPROX_OR_LESS_THAN(y, x); + } + + MatrixType m2 = MatrixType::Random(cols,cols2); + MatrixType m3 = m1*m2; + m2 = MatrixType::Random(cols,cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + + { + Index size = rows; + do { + m1 = MatrixType::Random(size,size); + qr.compute(m1); + } while(!qr.isInvertible()); + MatrixType m1_inv = qr.inverse(); + m3 = m1 * MatrixType::Random(size,cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m2, m1_inv*m3); + } +} + +template void qr_fixedsize() +{ + using std::sqrt; + using std::abs; + enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + int rank = internal::random(1, (std::min)(int(Rows), int(Cols))-1); + Matrix m1; + createRandomPIMatrixOfRank(rank,Rows,Cols,m1); + ColPivHouseholderQR > qr(m1); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(Cols - qr.rank(), qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(qr.isInjective(), (rank == Rows)); + VERIFY_IS_EQUAL(qr.isSurjective(), (rank == Cols)); + VERIFY_IS_EQUAL(qr.isInvertible(), (qr.isInjective() && qr.isSurjective())); + + Matrix r = qr.matrixQR().template triangularView(); + Matrix c = qr.householderQ() * r * qr.colsPermutation().inverse(); + VERIFY_IS_APPROX(m1, c); + + Matrix m2 = Matrix::Random(Cols,Cols2); + Matrix m3 = m1*m2; + m2 = Matrix::Random(Cols,Cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + // Verify that the absolute value of the diagonal elements in R are + // non-increasing until they reache the singularity threshold. + RealScalar threshold = + sqrt(RealScalar(Rows)) * (std::abs)(r(0, 0)) * NumTraits::epsilon(); + for (Index i = 0; i < (std::min)(int(Rows), int(Cols)) - 1; ++i) { + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); + if (x < threshold && y < threshold) continue; + if (!test_isApproxOrLessThan(y, x)) { + for (Index j = 0; j < (std::min)(int(Rows), int(Cols)); ++j) { + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; + } + std::cout << "Failure at i=" << i << ", rank=" << rank + << ", threshold=" << threshold << std::endl; + } + VERIFY_IS_APPROX_OR_LESS_THAN(y, x); + } +} + +// This test is meant to verify that pivots are chosen such that +// even for a graded matrix, the diagonal of R falls of roughly +// monotonically until it reaches the threshold for singularity. +// We use the so-called Kahan matrix, which is a famous counter-example +// for rank-revealing QR. See +// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf +// page 3 for more detail. +template void qr_kahan_matrix() +{ + using std::sqrt; + using std::abs; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Index rows = 300, cols = rows; + + MatrixType m1; + m1.setZero(rows,cols); + RealScalar s = std::pow(NumTraits::epsilon(), 1.0 / rows); + RealScalar c = std::sqrt(1 - s*s); + RealScalar pow_s_i(1.0); // pow(s,i) + for (Index i = 0; i < rows; ++i) { + m1(i, i) = pow_s_i; + m1.row(i).tail(rows - i - 1) = -pow_s_i * c * MatrixType::Ones(1, rows - i - 1); + pow_s_i *= s; + } + m1 = (m1 + m1.transpose()).eval(); + ColPivHouseholderQR qr(m1); + MatrixType r = qr.matrixQR().template triangularView(); + + RealScalar threshold = + std::sqrt(RealScalar(rows)) * numext::abs(r(0, 0)) * NumTraits::epsilon(); + for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) { + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); + if (x < threshold && y < threshold) continue; + if (!test_isApproxOrLessThan(y, x)) { + for (Index j = 0; j < (std::min)(rows, cols); ++j) { + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; + } + std::cout << "Failure at i=" << i << ", rank=" << qr.rank() + << ", threshold=" << threshold << std::endl; + } + VERIFY_IS_APPROX_OR_LESS_THAN(y, x); + } +} + +template void qr_invertible() +{ + using std::log; + using std::abs; + typedef typename NumTraits::Real RealScalar; + typedef typename MatrixType::Scalar Scalar; + + int size = internal::random(10,50); + + MatrixType m1(size, size), m2(size, size), m3(size, size); + m1 = MatrixType::Random(size,size); + + if (internal::is_same::value) + { + // let's build a matrix more stable to inverse + MatrixType a = MatrixType::Random(size,size*2); + m1 += a * a.adjoint(); + } + + ColPivHouseholderQR qr(m1); + m3 = MatrixType::Random(size,size); + m2 = qr.solve(m3); + //VERIFY_IS_APPROX(m3, m1*m2); + + // now construct a matrix with prescribed determinant + m1.setZero(); + for(int i = 0; i < size; i++) m1(i,i) = internal::random(); + RealScalar absdet = abs(m1.diagonal().prod()); + m3 = qr.householderQ(); // get a unitary + m1 = m3 * m1 * m3; + qr.compute(m1); + VERIFY_IS_APPROX(absdet, qr.absDeterminant()); + VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant()); +} + +template void qr_verify_assert() +{ + MatrixType tmp; + + ColPivHouseholderQR qr; + VERIFY_RAISES_ASSERT(qr.matrixQR()) + VERIFY_RAISES_ASSERT(qr.solve(tmp)) + VERIFY_RAISES_ASSERT(qr.householderQ()) + VERIFY_RAISES_ASSERT(qr.dimensionOfKernel()) + VERIFY_RAISES_ASSERT(qr.isInjective()) + VERIFY_RAISES_ASSERT(qr.isSurjective()) + VERIFY_RAISES_ASSERT(qr.isInvertible()) + VERIFY_RAISES_ASSERT(qr.inverse()) + VERIFY_RAISES_ASSERT(qr.absDeterminant()) + VERIFY_RAISES_ASSERT(qr.logAbsDeterminant()) +} + +void test_qr_colpivoting() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( qr() ); + CALL_SUBTEST_2( qr() ); + CALL_SUBTEST_3( qr() ); + CALL_SUBTEST_4(( qr_fixedsize, 4 >() )); + CALL_SUBTEST_5(( qr_fixedsize, 3 >() )); + CALL_SUBTEST_5(( qr_fixedsize, 1 >() )); + } + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( cod() ); + CALL_SUBTEST_2( cod() ); + CALL_SUBTEST_3( cod() ); + CALL_SUBTEST_4(( cod_fixedsize, 4 >() )); + CALL_SUBTEST_5(( cod_fixedsize, 3 >() )); + CALL_SUBTEST_5(( cod_fixedsize, 1 >() )); + } + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( qr_invertible() ); + CALL_SUBTEST_2( qr_invertible() ); + CALL_SUBTEST_6( qr_invertible() ); + CALL_SUBTEST_3( qr_invertible() ); + } + + CALL_SUBTEST_7(qr_verify_assert()); + CALL_SUBTEST_8(qr_verify_assert()); + CALL_SUBTEST_1(qr_verify_assert()); + CALL_SUBTEST_2(qr_verify_assert()); + CALL_SUBTEST_6(qr_verify_assert()); + CALL_SUBTEST_3(qr_verify_assert()); + + // Test problem size constructors + CALL_SUBTEST_9(ColPivHouseholderQR(10, 20)); + + CALL_SUBTEST_1( qr_kahan_matrix() ); + CALL_SUBTEST_2( qr_kahan_matrix() ); +} diff --git a/thirdparty/eigen/test/qr_fullpivoting.cpp b/thirdparty/eigen/test/qr_fullpivoting.cpp new file mode 100644 index 000000000..70e89c198 --- /dev/null +++ b/thirdparty/eigen/test/qr_fullpivoting.cpp @@ -0,0 +1,159 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void qr() +{ + typedef typename MatrixType::Index Index; + + Index max_size = EIGEN_TEST_MAX_SIZE; + Index min_size = numext::maxi(1,EIGEN_TEST_MAX_SIZE/10); + Index rows = internal::random(min_size,max_size), + cols = internal::random(min_size,max_size), + cols2 = internal::random(min_size,max_size), + rank = internal::random(1, (std::min)(rows, cols)-1); + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix MatrixQType; + MatrixType m1; + createRandomPIMatrixOfRank(rank,rows,cols,m1); + FullPivHouseholderQR qr(m1); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel()); + VERIFY(!qr.isInjective()); + VERIFY(!qr.isInvertible()); + VERIFY(!qr.isSurjective()); + + MatrixType r = qr.matrixQR(); + + MatrixQType q = qr.matrixQ(); + VERIFY_IS_UNITARY(q); + + // FIXME need better way to construct trapezoid + for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0); + + MatrixType c = qr.matrixQ() * r * qr.colsPermutation().inverse(); + + VERIFY_IS_APPROX(m1, c); + + // stress the ReturnByValue mechanism + MatrixType tmp; + VERIFY_IS_APPROX(tmp.noalias() = qr.matrixQ() * r, (qr.matrixQ() * r).eval()); + + MatrixType m2 = MatrixType::Random(cols,cols2); + MatrixType m3 = m1*m2; + m2 = MatrixType::Random(cols,cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + + { + Index size = rows; + do { + m1 = MatrixType::Random(size,size); + qr.compute(m1); + } while(!qr.isInvertible()); + MatrixType m1_inv = qr.inverse(); + m3 = m1 * MatrixType::Random(size,cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m2, m1_inv*m3); + } +} + +template void qr_invertible() +{ + using std::log; + using std::abs; + typedef typename NumTraits::Real RealScalar; + typedef typename MatrixType::Scalar Scalar; + + Index max_size = numext::mini(50,EIGEN_TEST_MAX_SIZE); + Index min_size = numext::maxi(1,EIGEN_TEST_MAX_SIZE/10); + Index size = internal::random(min_size,max_size); + + MatrixType m1(size, size), m2(size, size), m3(size, size); + m1 = MatrixType::Random(size,size); + + if (internal::is_same::value) + { + // let's build a matrix more stable to inverse + MatrixType a = MatrixType::Random(size,size*2); + m1 += a * a.adjoint(); + } + + FullPivHouseholderQR qr(m1); + VERIFY(qr.isInjective()); + VERIFY(qr.isInvertible()); + VERIFY(qr.isSurjective()); + + m3 = MatrixType::Random(size,size); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + + // now construct a matrix with prescribed determinant + m1.setZero(); + for(int i = 0; i < size; i++) m1(i,i) = internal::random(); + RealScalar absdet = abs(m1.diagonal().prod()); + m3 = qr.matrixQ(); // get a unitary + m1 = m3 * m1 * m3; + qr.compute(m1); + VERIFY_IS_APPROX(absdet, qr.absDeterminant()); + VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant()); +} + +template void qr_verify_assert() +{ + MatrixType tmp; + + FullPivHouseholderQR qr; + VERIFY_RAISES_ASSERT(qr.matrixQR()) + VERIFY_RAISES_ASSERT(qr.solve(tmp)) + VERIFY_RAISES_ASSERT(qr.matrixQ()) + VERIFY_RAISES_ASSERT(qr.dimensionOfKernel()) + VERIFY_RAISES_ASSERT(qr.isInjective()) + VERIFY_RAISES_ASSERT(qr.isSurjective()) + VERIFY_RAISES_ASSERT(qr.isInvertible()) + VERIFY_RAISES_ASSERT(qr.inverse()) + VERIFY_RAISES_ASSERT(qr.absDeterminant()) + VERIFY_RAISES_ASSERT(qr.logAbsDeterminant()) +} + +void test_qr_fullpivoting() +{ + for(int i = 0; i < 1; i++) { + // FIXME : very weird bug here +// CALL_SUBTEST(qr(Matrix2f()) ); + CALL_SUBTEST_1( qr() ); + CALL_SUBTEST_2( qr() ); + CALL_SUBTEST_3( qr() ); + } + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( qr_invertible() ); + CALL_SUBTEST_2( qr_invertible() ); + CALL_SUBTEST_4( qr_invertible() ); + CALL_SUBTEST_3( qr_invertible() ); + } + + CALL_SUBTEST_5(qr_verify_assert()); + CALL_SUBTEST_6(qr_verify_assert()); + CALL_SUBTEST_1(qr_verify_assert()); + CALL_SUBTEST_2(qr_verify_assert()); + CALL_SUBTEST_4(qr_verify_assert()); + CALL_SUBTEST_3(qr_verify_assert()); + + // Test problem size constructors + CALL_SUBTEST_7(FullPivHouseholderQR(10, 20)); + CALL_SUBTEST_7((FullPivHouseholderQR >(10,20))); + CALL_SUBTEST_7((FullPivHouseholderQR >(Matrix::Random()))); + CALL_SUBTEST_7((FullPivHouseholderQR >(20,10))); + CALL_SUBTEST_7((FullPivHouseholderQR >(Matrix::Random()))); +} diff --git a/thirdparty/eigen/test/qtvector.cpp b/thirdparty/eigen/test/qtvector.cpp new file mode 100644 index 000000000..2be885e48 --- /dev/null +++ b/thirdparty/eigen/test/qtvector.cpp @@ -0,0 +1,158 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_WORK_AROUND_QT_BUG_CALLING_WRONG_OPERATOR_NEW_FIXED_IN_QT_4_5 + +#include "main.h" +#include +#include +#include + +template +void check_qtvector_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + + Index rows = m.rows(); + Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + QVector v(10, MatrixType(rows,cols)), w(20, y); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], y); + } + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.fill(y,22); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + MatrixType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(int i=23; i +void check_qtvector_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + QVector v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.fill(y,22); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(TransformType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + TransformType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; int(i) +void check_qtvector_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + QVector v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.fill(y,22); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(QuaternionType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + QuaternionType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; int(i) +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +typedef long long int64; + +template Scalar check_in_range(Scalar x, Scalar y) +{ + Scalar r = internal::random(x,y); + VERIFY(r>=x); + if(y>=x) + { + VERIFY(r<=y); + } + return r; +} + +template void check_all_in_range(Scalar x, Scalar y) +{ + Array mask(y-x+1); + mask.fill(0); + long n = (y-x+1)*32; + for(long k=0; k0).all() ); +} + +template void check_histogram(Scalar x, Scalar y, int bins) +{ + Array hist(bins); + hist.fill(0); + int f = 100000; + int n = bins*f; + int64 range = int64(y)-int64(x); + int divisor = int((range+1)/bins); + assert(((range+1)%bins)==0); + for(int k=0; k()/double(f))-1.0).abs()<0.02).all() ); +} + +void test_rand() +{ + long long_ref = NumTraits::highest()/10; + signed char char_offset = (std::min)(g_repeat,64); + signed char short_offset = (std::min)(g_repeat,16000); + + for(int i = 0; i < g_repeat*10000; i++) { + CALL_SUBTEST(check_in_range(10,11)); + CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); + CALL_SUBTEST(check_in_range(-1,1)); + CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); + + CALL_SUBTEST(check_in_range(10,11)); + CALL_SUBTEST(check_in_range(1.24234523,1.24234523)); + CALL_SUBTEST(check_in_range(-1,1)); + CALL_SUBTEST(check_in_range(-1432.2352,-1432.2352)); + + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(0,-1)); + CALL_SUBTEST(check_in_range(-673456,673456)); + CALL_SUBTEST(check_in_range(-RAND_MAX+10,RAND_MAX-10)); + CALL_SUBTEST(check_in_range(-24345,24345)); + CALL_SUBTEST(check_in_range(-long_ref,long_ref)); + } + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+char_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-char_offset,-11)); + CALL_SUBTEST(check_all_in_range(-126,-126+char_offset)); + CALL_SUBTEST(check_all_in_range(126-char_offset,126)); + CALL_SUBTEST(check_all_in_range(-126,126)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+short_offset)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-short_offset,-11)); + CALL_SUBTEST(check_all_in_range(-24345,-24345+short_offset)); + CALL_SUBTEST(check_all_in_range(24345,24345+short_offset)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); + CALL_SUBTEST(check_all_in_range(-673456,-673456+g_repeat)); + CALL_SUBTEST(check_all_in_range(673456,673456+g_repeat)); + + CALL_SUBTEST(check_all_in_range(11,11)); + CALL_SUBTEST(check_all_in_range(11,11+g_repeat)); + CALL_SUBTEST(check_all_in_range(-5,5)); + CALL_SUBTEST(check_all_in_range(-11-g_repeat,-11)); + CALL_SUBTEST(check_all_in_range(-long_ref,-long_ref+g_repeat)); + CALL_SUBTEST(check_all_in_range( long_ref, long_ref+g_repeat)); + + CALL_SUBTEST(check_histogram(-5,5,11)); + int bins = 100; + CALL_SUBTEST(check_histogram(-3333,-3333+bins*(3333/bins)-1,bins)); + bins = 1000; + CALL_SUBTEST(check_histogram(-RAND_MAX+10,-RAND_MAX+10+bins*(RAND_MAX/bins)-1,bins)); + CALL_SUBTEST(check_histogram(-RAND_MAX+10,-int64(RAND_MAX)+10+bins*(2*int64(RAND_MAX)/bins)-1,bins)); +} diff --git a/thirdparty/eigen/test/real_qz.cpp b/thirdparty/eigen/test/real_qz.cpp new file mode 100644 index 000000000..99ac31235 --- /dev/null +++ b/thirdparty/eigen/test/real_qz.cpp @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Alexey Korepanov +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_RUNTIME_NO_MALLOC +#include "main.h" +#include +#include + +template void real_qz(const MatrixType& m) +{ + /* this test covers the following files: + RealQZ.h + */ + using std::abs; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index dim = m.cols(); + + MatrixType A = MatrixType::Random(dim,dim), + B = MatrixType::Random(dim,dim); + + + // Regression test for bug 985: Randomly set rows or columns to zero + Index k=internal::random(0, dim-1); + switch(internal::random(0,10)) { + case 0: + A.row(k).setZero(); break; + case 1: + A.col(k).setZero(); break; + case 2: + B.row(k).setZero(); break; + case 3: + B.col(k).setZero(); break; + default: + break; + } + + RealQZ qz(dim); + // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition + //Eigen::internal::set_is_malloc_allowed(false); + qz.compute(A,B); + //Eigen::internal::set_is_malloc_allowed(true); + + VERIFY_IS_EQUAL(qz.info(), Success); + // check for zeros + bool all_zeros = true; + for (Index i=0; i void matrixRedux(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols); + + // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test + // failures if we underflow into denormals. Thus, we scale so that entries are close to 1. + MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1; + + VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); + VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), Scalar(float(rows*cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy + Scalar s(0), p(1), minc(numext::real(m1.coeff(0))), maxc(numext::real(m1.coeff(0))); + for(int j = 0; j < cols; j++) + for(int i = 0; i < rows; i++) + { + s += m1(i,j); + p *= m1_for_prod(i,j); + minc = (std::min)(numext::real(minc), numext::real(m1(i,j))); + maxc = (std::max)(numext::real(maxc), numext::real(m1(i,j))); + } + const Scalar mean = s/Scalar(RealScalar(rows*cols)); + + VERIFY_IS_APPROX(m1.sum(), s); + VERIFY_IS_APPROX(m1.mean(), mean); + VERIFY_IS_APPROX(m1_for_prod.prod(), p); + VERIFY_IS_APPROX(m1.real().minCoeff(), numext::real(minc)); + VERIFY_IS_APPROX(m1.real().maxCoeff(), numext::real(maxc)); + + // test slice vectorization assuming assign is ok + Index r0 = internal::random(0,rows-1); + Index c0 = internal::random(0,cols-1); + Index r1 = internal::random(r0+1,rows)-r0; + Index c1 = internal::random(c0+1,cols)-c0; + VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).sum(), m1.block(r0,c0,r1,c1).eval().sum()); + VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).mean(), m1.block(r0,c0,r1,c1).eval().mean()); + VERIFY_IS_APPROX(m1_for_prod.block(r0,c0,r1,c1).prod(), m1_for_prod.block(r0,c0,r1,c1).eval().prod()); + VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().minCoeff(), m1.block(r0,c0,r1,c1).real().eval().minCoeff()); + VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().maxCoeff(), m1.block(r0,c0,r1,c1).real().eval().maxCoeff()); + + // regression for bug 1090 + const int R1 = MatrixType::RowsAtCompileTime>=2 ? MatrixType::RowsAtCompileTime/2 : 6; + const int C1 = MatrixType::ColsAtCompileTime>=2 ? MatrixType::ColsAtCompileTime/2 : 6; + if(R1<=rows-r0 && C1<=cols-c0) + { + VERIFY_IS_APPROX( (m1.template block(r0,c0).sum()), m1.block(r0,c0,R1,C1).sum() ); + } + + // test empty objects + VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0)); + VERIFY_IS_APPROX(m1.block(r0,c0,0,0).prod(), Scalar(1)); + + // test nesting complex expression + VERIFY_EVALUATION_COUNT( (m1.matrix()*m1.matrix().transpose()).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); + Matrix m2(rows,rows); + m2.setRandom(); + VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(), (MatrixType::SizeAtCompileTime==Dynamic ? 1 : 0) ); +} + +template void vectorRedux(const VectorType& w) +{ + using std::abs; + typedef typename VectorType::Index Index; + typedef typename VectorType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + Index size = w.size(); + + VectorType v = VectorType::Random(size); + VectorType v_for_prod = VectorType::Ones(size) + Scalar(0.2) * v; // see comment above declaration of m1_for_prod + + for(int i = 1; i < size; i++) + { + Scalar s(0), p(1); + RealScalar minc(numext::real(v.coeff(0))), maxc(numext::real(v.coeff(0))); + for(int j = 0; j < i; j++) + { + s += v[j]; + p *= v_for_prod[j]; + minc = (std::min)(minc, numext::real(v[j])); + maxc = (std::max)(maxc, numext::real(v[j])); + } + VERIFY_IS_MUCH_SMALLER_THAN(abs(s - v.head(i).sum()), Scalar(1)); + VERIFY_IS_APPROX(p, v_for_prod.head(i).prod()); + VERIFY_IS_APPROX(minc, v.real().head(i).minCoeff()); + VERIFY_IS_APPROX(maxc, v.real().head(i).maxCoeff()); + } + + for(int i = 0; i < size-1; i++) + { + Scalar s(0), p(1); + RealScalar minc(numext::real(v.coeff(i))), maxc(numext::real(v.coeff(i))); + for(int j = i; j < size; j++) + { + s += v[j]; + p *= v_for_prod[j]; + minc = (std::min)(minc, numext::real(v[j])); + maxc = (std::max)(maxc, numext::real(v[j])); + } + VERIFY_IS_MUCH_SMALLER_THAN(abs(s - v.tail(size-i).sum()), Scalar(1)); + VERIFY_IS_APPROX(p, v_for_prod.tail(size-i).prod()); + VERIFY_IS_APPROX(minc, v.real().tail(size-i).minCoeff()); + VERIFY_IS_APPROX(maxc, v.real().tail(size-i).maxCoeff()); + } + + for(int i = 0; i < size/2; i++) + { + Scalar s(0), p(1); + RealScalar minc(numext::real(v.coeff(i))), maxc(numext::real(v.coeff(i))); + for(int j = i; j < size-i; j++) + { + s += v[j]; + p *= v_for_prod[j]; + minc = (std::min)(minc, numext::real(v[j])); + maxc = (std::max)(maxc, numext::real(v[j])); + } + VERIFY_IS_MUCH_SMALLER_THAN(abs(s - v.segment(i, size-2*i).sum()), Scalar(1)); + VERIFY_IS_APPROX(p, v_for_prod.segment(i, size-2*i).prod()); + VERIFY_IS_APPROX(minc, v.real().segment(i, size-2*i).minCoeff()); + VERIFY_IS_APPROX(maxc, v.real().segment(i, size-2*i).maxCoeff()); + } + + // test empty objects + VERIFY_IS_APPROX(v.head(0).sum(), Scalar(0)); + VERIFY_IS_APPROX(v.tail(0).prod(), Scalar(1)); + VERIFY_RAISES_ASSERT(v.head(0).mean()); + VERIFY_RAISES_ASSERT(v.head(0).minCoeff()); + VERIFY_RAISES_ASSERT(v.head(0).maxCoeff()); +} + +void test_redux() +{ + // the max size cannot be too large, otherwise reduxion operations obviously generate large errors. + int maxsize = (std::min)(100,EIGEN_TEST_MAX_SIZE); + TEST_SET_BUT_UNUSED_VARIABLE(maxsize); + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( matrixRedux(Matrix()) ); + CALL_SUBTEST_1( matrixRedux(Array()) ); + CALL_SUBTEST_2( matrixRedux(Matrix2f()) ); + CALL_SUBTEST_2( matrixRedux(Array2f()) ); + CALL_SUBTEST_3( matrixRedux(Matrix4d()) ); + CALL_SUBTEST_3( matrixRedux(Array4d()) ); + CALL_SUBTEST_4( matrixRedux(MatrixXcf(internal::random(1,maxsize), internal::random(1,maxsize))) ); + CALL_SUBTEST_4( matrixRedux(ArrayXXcf(internal::random(1,maxsize), internal::random(1,maxsize))) ); + CALL_SUBTEST_5( matrixRedux(MatrixXd (internal::random(1,maxsize), internal::random(1,maxsize))) ); + CALL_SUBTEST_5( matrixRedux(ArrayXXd (internal::random(1,maxsize), internal::random(1,maxsize))) ); + CALL_SUBTEST_6( matrixRedux(MatrixXi (internal::random(1,maxsize), internal::random(1,maxsize))) ); + CALL_SUBTEST_6( matrixRedux(ArrayXXi (internal::random(1,maxsize), internal::random(1,maxsize))) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_7( vectorRedux(Vector4f()) ); + CALL_SUBTEST_7( vectorRedux(Array4f()) ); + CALL_SUBTEST_5( vectorRedux(VectorXd(internal::random(1,maxsize))) ); + CALL_SUBTEST_5( vectorRedux(ArrayXd(internal::random(1,maxsize))) ); + CALL_SUBTEST_8( vectorRedux(VectorXf(internal::random(1,maxsize))) ); + CALL_SUBTEST_8( vectorRedux(ArrayXf(internal::random(1,maxsize))) ); + } +} diff --git a/thirdparty/eigen/test/ref.cpp b/thirdparty/eigen/test/ref.cpp new file mode 100644 index 000000000..769db0414 --- /dev/null +++ b/thirdparty/eigen/test/ref.cpp @@ -0,0 +1,280 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// This unit test cannot be easily written to work with EIGEN_DEFAULT_TO_ROW_MAJOR +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif + +#define TEST_ENABLE_TEMPORARY_TRACKING + +#include "main.h" + +// test Ref.h + +// Deal with i387 extended precision +#if EIGEN_ARCH_i386 && !(EIGEN_ARCH_x86_64) + +#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(4,4) +#pragma GCC optimize ("-ffloat-store") +#else +#undef VERIFY_IS_EQUAL +#define VERIFY_IS_EQUAL(X,Y) VERIFY_IS_APPROX(X,Y) +#endif + +#endif + +template void ref_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix DynMatrixType; + typedef Matrix RealDynMatrixType; + + typedef Ref RefMat; + typedef Ref RefDynMat; + typedef Ref ConstRefDynMat; + typedef Ref > RefRealMatWithStride; + + Index rows = m.rows(), cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = m1; + + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + Index brows = internal::random(1,rows-i); + Index bcols = internal::random(1,cols-j); + + RefMat rm0 = m1; + VERIFY_IS_EQUAL(rm0, m1); + RefDynMat rm1 = m1; + VERIFY_IS_EQUAL(rm1, m1); + RefDynMat rm2 = m1.block(i,j,brows,bcols); + VERIFY_IS_EQUAL(rm2, m1.block(i,j,brows,bcols)); + rm2.setOnes(); + m2.block(i,j,brows,bcols).setOnes(); + VERIFY_IS_EQUAL(m1, m2); + + m2.block(i,j,brows,bcols).setRandom(); + rm2 = m2.block(i,j,brows,bcols); + VERIFY_IS_EQUAL(m1, m2); + + ConstRefDynMat rm3 = m1.block(i,j,brows,bcols); + m1.block(i,j,brows,bcols) *= 2; + m2.block(i,j,brows,bcols) *= 2; + VERIFY_IS_EQUAL(rm3, m2.block(i,j,brows,bcols)); + RefRealMatWithStride rm4 = m1.real(); + VERIFY_IS_EQUAL(rm4, m2.real()); + rm4.array() += 1; + m2.real().array() += 1; + VERIFY_IS_EQUAL(m1, m2); +} + +template void ref_vector(const VectorType& m) +{ + typedef typename VectorType::Index Index; + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; + typedef Matrix DynMatrixType; + typedef Matrix MatrixType; + typedef Matrix RealDynMatrixType; + + typedef Ref RefMat; + typedef Ref RefDynMat; + typedef Ref ConstRefDynMat; + typedef Ref > RefRealMatWithStride; + typedef Ref > RefMatWithStride; + + Index size = m.size(); + + VectorType v1 = VectorType::Random(size), + v2 = v1; + MatrixType mat1 = MatrixType::Random(size,size), + mat2 = mat1, + mat3 = MatrixType::Random(size,size); + + Index i = internal::random(0,size-1); + Index bsize = internal::random(1,size-i); + + RefMat rm0 = v1; + VERIFY_IS_EQUAL(rm0, v1); + RefDynMat rv1 = v1; + VERIFY_IS_EQUAL(rv1, v1); + RefDynMat rv2 = v1.segment(i,bsize); + VERIFY_IS_EQUAL(rv2, v1.segment(i,bsize)); + rv2.setOnes(); + v2.segment(i,bsize).setOnes(); + VERIFY_IS_EQUAL(v1, v2); + + v2.segment(i,bsize).setRandom(); + rv2 = v2.segment(i,bsize); + VERIFY_IS_EQUAL(v1, v2); + + ConstRefDynMat rm3 = v1.segment(i,bsize); + v1.segment(i,bsize) *= 2; + v2.segment(i,bsize) *= 2; + VERIFY_IS_EQUAL(rm3, v2.segment(i,bsize)); + + RefRealMatWithStride rm4 = v1.real(); + VERIFY_IS_EQUAL(rm4, v2.real()); + rm4.array() += 1; + v2.real().array() += 1; + VERIFY_IS_EQUAL(v1, v2); + + RefMatWithStride rm5 = mat1.row(i).transpose(); + VERIFY_IS_EQUAL(rm5, mat1.row(i).transpose()); + rm5.array() += 1; + mat2.row(i).array() += 1; + VERIFY_IS_EQUAL(mat1, mat2); + rm5.noalias() = rm4.transpose() * mat3; + mat2.row(i) = v2.real().transpose() * mat3; + VERIFY_IS_APPROX(mat1, mat2); +} + +template void check_const_correctness(const PlainObjectType&) +{ + // verify that ref-to-const don't have LvalueBit + typedef typename internal::add_const::type ConstPlainObjectType; + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(Ref::Flags & LvalueBit) ); + VERIFY( !(Ref::Flags & LvalueBit) ); +} + +template +EIGEN_DONT_INLINE void call_ref_1(Ref a, const B &b) { VERIFY_IS_EQUAL(a,b); } +template +EIGEN_DONT_INLINE void call_ref_2(const Ref& a, const B &b) { VERIFY_IS_EQUAL(a,b); } +template +EIGEN_DONT_INLINE void call_ref_3(Ref > a, const B &b) { VERIFY_IS_EQUAL(a,b); } +template +EIGEN_DONT_INLINE void call_ref_4(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a,b); } +template +EIGEN_DONT_INLINE void call_ref_5(Ref > a, const B &b) { VERIFY_IS_EQUAL(a,b); } +template +EIGEN_DONT_INLINE void call_ref_6(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a,b); } +template +EIGEN_DONT_INLINE void call_ref_7(Ref > a, const B &b) { VERIFY_IS_EQUAL(a,b); } + +void call_ref() +{ + VectorXcf ca = VectorXcf::Random(10); + VectorXf a = VectorXf::Random(10); + RowVectorXf b = RowVectorXf::Random(10); + MatrixXf A = MatrixXf::Random(10,10); + RowVector3f c = RowVector3f::Random(); + const VectorXf& ac(a); + VectorBlock ab(a,0,3); + const VectorBlock abc(a,0,3); + + + VERIFY_EVALUATION_COUNT( call_ref_1(a,a), 0); + VERIFY_EVALUATION_COUNT( call_ref_1(b,b.transpose()), 0); +// call_ref_1(ac,a RowMatrixXd; +int test_ref_overload_fun1(Ref ) { return 1; } +int test_ref_overload_fun1(Ref ) { return 2; } +int test_ref_overload_fun1(Ref ) { return 3; } + +int test_ref_overload_fun2(Ref ) { return 4; } +int test_ref_overload_fun2(Ref ) { return 5; } + +void test_ref_ambiguous(const Ref &A, Ref B) +{ + B = A; + B = A - A; +} + +// See also bug 969 +void test_ref_overloads() +{ + MatrixXd Ad, Bd; + RowMatrixXd rAd, rBd; + VERIFY( test_ref_overload_fun1(Ad)==1 ); + VERIFY( test_ref_overload_fun1(rAd)==2 ); + + MatrixXf Af, Bf; + VERIFY( test_ref_overload_fun2(Ad)==4 ); + VERIFY( test_ref_overload_fun2(Ad+Bd)==4 ); + VERIFY( test_ref_overload_fun2(Af+Bf)==5 ); + + ArrayXd A, B; + test_ref_ambiguous(A, B); +} + +void test_ref() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( ref_vector(Matrix()) ); + CALL_SUBTEST_1( check_const_correctness(Matrix()) ); + CALL_SUBTEST_2( ref_vector(Vector4d()) ); + CALL_SUBTEST_2( check_const_correctness(Matrix4d()) ); + CALL_SUBTEST_3( ref_vector(Vector4cf()) ); + CALL_SUBTEST_4( ref_vector(VectorXcf(8)) ); + CALL_SUBTEST_5( ref_vector(VectorXi(12)) ); + CALL_SUBTEST_5( check_const_correctness(VectorXi(12)) ); + + CALL_SUBTEST_1( ref_matrix(Matrix()) ); + CALL_SUBTEST_2( ref_matrix(Matrix4d()) ); + CALL_SUBTEST_1( ref_matrix(Matrix()) ); + CALL_SUBTEST_4( ref_matrix(MatrixXcf(internal::random(1,10),internal::random(1,10))) ); + CALL_SUBTEST_4( ref_matrix(Matrix,10,15>()) ); + CALL_SUBTEST_5( ref_matrix(MatrixXi(internal::random(1,10),internal::random(1,10))) ); + CALL_SUBTEST_6( call_ref() ); + } + + CALL_SUBTEST_7( test_ref_overloads() ); +} diff --git a/thirdparty/eigen/test/resize.cpp b/thirdparty/eigen/test/resize.cpp new file mode 100644 index 000000000..4adaafe56 --- /dev/null +++ b/thirdparty/eigen/test/resize.cpp @@ -0,0 +1,41 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Keir Mierle +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +void resizeLikeTest() +{ + MatrixXf A(rows, cols); + MatrixXf B; + Matrix C; + B.resizeLike(A); + C.resizeLike(B); // Shouldn't crash. + VERIFY(B.rows() == rows && B.cols() == cols); + + VectorXf x(rows); + RowVectorXf y; + y.resizeLike(x); + VERIFY(y.rows() == 1 && y.cols() == rows); + + y.resize(cols); + x.resizeLike(y); + VERIFY(x.rows() == cols && x.cols() == 1); +} + +void resizeLikeTest12() { resizeLikeTest<1,2>(); } +void resizeLikeTest1020() { resizeLikeTest<10,20>(); } +void resizeLikeTest31() { resizeLikeTest<3,1>(); } + +void test_resize() +{ + CALL_SUBTEST(resizeLikeTest12() ); + CALL_SUBTEST(resizeLikeTest1020() ); + CALL_SUBTEST(resizeLikeTest31() ); +} diff --git a/thirdparty/eigen/test/rvalue_types.cpp b/thirdparty/eigen/test/rvalue_types.cpp new file mode 100644 index 000000000..8887f1b1b --- /dev/null +++ b/thirdparty/eigen/test/rvalue_types.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +using internal::UIntPtr; + +#if EIGEN_HAS_RVALUE_REFERENCES +template +void rvalue_copyassign(const MatrixType& m) +{ + + typedef typename internal::traits::Scalar Scalar; + + // create a temporary which we are about to destroy by moving + MatrixType tmp = m; + UIntPtr src_address = reinterpret_cast(tmp.data()); + + // move the temporary to n + MatrixType n = std::move(tmp); + UIntPtr dst_address = reinterpret_cast(n.data()); + + if (MatrixType::RowsAtCompileTime==Dynamic|| MatrixType::ColsAtCompileTime==Dynamic) + { + // verify that we actually moved the guts + VERIFY_IS_EQUAL(src_address, dst_address); + } + + // verify that the content did not change + Scalar abs_diff = (m-n).array().abs().sum(); + VERIFY_IS_EQUAL(abs_diff, Scalar(0)); +} +#else +template +void rvalue_copyassign(const MatrixType&) {} +#endif + +void test_rvalue_types() +{ + CALL_SUBTEST_1(rvalue_copyassign( MatrixXf::Random(50,50).eval() )); + CALL_SUBTEST_1(rvalue_copyassign( ArrayXXf::Random(50,50).eval() )); + + CALL_SUBTEST_1(rvalue_copyassign( Matrix::Random(50).eval() )); + CALL_SUBTEST_1(rvalue_copyassign( Array::Random(50).eval() )); + + CALL_SUBTEST_1(rvalue_copyassign( Matrix::Random(50).eval() )); + CALL_SUBTEST_1(rvalue_copyassign( Array::Random(50).eval() )); + + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); + + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); +} diff --git a/thirdparty/eigen/test/schur_complex.cpp b/thirdparty/eigen/test/schur_complex.cpp new file mode 100644 index 000000000..deb78e44e --- /dev/null +++ b/thirdparty/eigen/test/schur_complex.cpp @@ -0,0 +1,91 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010,2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template void schur(int size = MatrixType::ColsAtCompileTime) +{ + typedef typename ComplexSchur::ComplexScalar ComplexScalar; + typedef typename ComplexSchur::ComplexMatrixType ComplexMatrixType; + + // Test basic functionality: T is triangular and A = U T U* + for(int counter = 0; counter < g_repeat; ++counter) { + MatrixType A = MatrixType::Random(size, size); + ComplexSchur schurOfA(A); + VERIFY_IS_EQUAL(schurOfA.info(), Success); + ComplexMatrixType U = schurOfA.matrixU(); + ComplexMatrixType T = schurOfA.matrixT(); + for(int row = 1; row < size; ++row) { + for(int col = 0; col < row; ++col) { + VERIFY(T(row,col) == (typename MatrixType::Scalar)0); + } + } + VERIFY_IS_APPROX(A.template cast(), U * T * U.adjoint()); + } + + // Test asserts when not initialized + ComplexSchur csUninitialized; + VERIFY_RAISES_ASSERT(csUninitialized.matrixT()); + VERIFY_RAISES_ASSERT(csUninitialized.matrixU()); + VERIFY_RAISES_ASSERT(csUninitialized.info()); + + // Test whether compute() and constructor returns same result + MatrixType A = MatrixType::Random(size, size); + ComplexSchur cs1; + cs1.compute(A); + ComplexSchur cs2(A); + VERIFY_IS_EQUAL(cs1.info(), Success); + VERIFY_IS_EQUAL(cs2.info(), Success); + VERIFY_IS_EQUAL(cs1.matrixT(), cs2.matrixT()); + VERIFY_IS_EQUAL(cs1.matrixU(), cs2.matrixU()); + + // Test maximum number of iterations + ComplexSchur cs3; + cs3.setMaxIterations(ComplexSchur::m_maxIterationsPerRow * size).compute(A); + VERIFY_IS_EQUAL(cs3.info(), Success); + VERIFY_IS_EQUAL(cs3.matrixT(), cs1.matrixT()); + VERIFY_IS_EQUAL(cs3.matrixU(), cs1.matrixU()); + cs3.setMaxIterations(1).compute(A); + VERIFY_IS_EQUAL(cs3.info(), size > 1 ? NoConvergence : Success); + VERIFY_IS_EQUAL(cs3.getMaxIterations(), 1); + + MatrixType Atriangular = A; + Atriangular.template triangularView().setZero(); + cs3.setMaxIterations(1).compute(Atriangular); // triangular matrices do not need any iterations + VERIFY_IS_EQUAL(cs3.info(), Success); + VERIFY_IS_EQUAL(cs3.matrixT(), Atriangular.template cast()); + VERIFY_IS_EQUAL(cs3.matrixU(), ComplexMatrixType::Identity(size, size)); + + // Test computation of only T, not U + ComplexSchur csOnlyT(A, false); + VERIFY_IS_EQUAL(csOnlyT.info(), Success); + VERIFY_IS_EQUAL(cs1.matrixT(), csOnlyT.matrixT()); + VERIFY_RAISES_ASSERT(csOnlyT.matrixU()); + + if (size > 1 && size < 20) + { + // Test matrix with NaN + A(0,0) = std::numeric_limits::quiet_NaN(); + ComplexSchur csNaN(A); + VERIFY_IS_EQUAL(csNaN.info(), NoConvergence); + } +} + +void test_schur_complex() +{ + CALL_SUBTEST_1(( schur() )); + CALL_SUBTEST_2(( schur(internal::random(1,EIGEN_TEST_MAX_SIZE/4)) )); + CALL_SUBTEST_3(( schur, 1, 1> >() )); + CALL_SUBTEST_4(( schur >() )); + + // Test problem size constructors + CALL_SUBTEST_5(ComplexSchur(10)); +} diff --git a/thirdparty/eigen/test/schur_real.cpp b/thirdparty/eigen/test/schur_real.cpp new file mode 100644 index 000000000..4aede87df --- /dev/null +++ b/thirdparty/eigen/test/schur_real.cpp @@ -0,0 +1,112 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010,2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template void verifyIsQuasiTriangular(const MatrixType& T) +{ + typedef typename MatrixType::Index Index; + + const Index size = T.cols(); + typedef typename MatrixType::Scalar Scalar; + + // Check T is lower Hessenberg + for(int row = 2; row < size; ++row) { + for(int col = 0; col < row - 1; ++col) { + VERIFY(T(row,col) == Scalar(0)); + } + } + + // Check that any non-zero on the subdiagonal is followed by a zero and is + // part of a 2x2 diagonal block with imaginary eigenvalues. + for(int row = 1; row < size; ++row) { + if (T(row,row-1) != Scalar(0)) { + VERIFY(row == size-1 || T(row+1,row) == 0); + Scalar tr = T(row-1,row-1) + T(row,row); + Scalar det = T(row-1,row-1) * T(row,row) - T(row-1,row) * T(row,row-1); + VERIFY(4 * det > tr * tr); + } + } +} + +template void schur(int size = MatrixType::ColsAtCompileTime) +{ + // Test basic functionality: T is quasi-triangular and A = U T U* + for(int counter = 0; counter < g_repeat; ++counter) { + MatrixType A = MatrixType::Random(size, size); + RealSchur schurOfA(A); + VERIFY_IS_EQUAL(schurOfA.info(), Success); + MatrixType U = schurOfA.matrixU(); + MatrixType T = schurOfA.matrixT(); + verifyIsQuasiTriangular(T); + VERIFY_IS_APPROX(A, U * T * U.transpose()); + } + + // Test asserts when not initialized + RealSchur rsUninitialized; + VERIFY_RAISES_ASSERT(rsUninitialized.matrixT()); + VERIFY_RAISES_ASSERT(rsUninitialized.matrixU()); + VERIFY_RAISES_ASSERT(rsUninitialized.info()); + + // Test whether compute() and constructor returns same result + MatrixType A = MatrixType::Random(size, size); + RealSchur rs1; + rs1.compute(A); + RealSchur rs2(A); + VERIFY_IS_EQUAL(rs1.info(), Success); + VERIFY_IS_EQUAL(rs2.info(), Success); + VERIFY_IS_EQUAL(rs1.matrixT(), rs2.matrixT()); + VERIFY_IS_EQUAL(rs1.matrixU(), rs2.matrixU()); + + // Test maximum number of iterations + RealSchur rs3; + rs3.setMaxIterations(RealSchur::m_maxIterationsPerRow * size).compute(A); + VERIFY_IS_EQUAL(rs3.info(), Success); + VERIFY_IS_EQUAL(rs3.matrixT(), rs1.matrixT()); + VERIFY_IS_EQUAL(rs3.matrixU(), rs1.matrixU()); + if (size > 2) { + rs3.setMaxIterations(1).compute(A); + VERIFY_IS_EQUAL(rs3.info(), NoConvergence); + VERIFY_IS_EQUAL(rs3.getMaxIterations(), 1); + } + + MatrixType Atriangular = A; + Atriangular.template triangularView().setZero(); + rs3.setMaxIterations(1).compute(Atriangular); // triangular matrices do not need any iterations + VERIFY_IS_EQUAL(rs3.info(), Success); + VERIFY_IS_APPROX(rs3.matrixT(), Atriangular); // approx because of scaling... + VERIFY_IS_EQUAL(rs3.matrixU(), MatrixType::Identity(size, size)); + + // Test computation of only T, not U + RealSchur rsOnlyT(A, false); + VERIFY_IS_EQUAL(rsOnlyT.info(), Success); + VERIFY_IS_EQUAL(rs1.matrixT(), rsOnlyT.matrixT()); + VERIFY_RAISES_ASSERT(rsOnlyT.matrixU()); + + if (size > 2 && size < 20) + { + // Test matrix with NaN + A(0,0) = std::numeric_limits::quiet_NaN(); + RealSchur rsNaN(A); + VERIFY_IS_EQUAL(rsNaN.info(), NoConvergence); + } +} + +void test_schur_real() +{ + CALL_SUBTEST_1(( schur() )); + CALL_SUBTEST_2(( schur(internal::random(1,EIGEN_TEST_MAX_SIZE/4)) )); + CALL_SUBTEST_3(( schur >() )); + CALL_SUBTEST_4(( schur >() )); + + // Test problem size constructors + CALL_SUBTEST_5(RealSchur(10)); +} diff --git a/thirdparty/eigen/test/selfadjoint.cpp b/thirdparty/eigen/test/selfadjoint.cpp new file mode 100644 index 000000000..92401e506 --- /dev/null +++ b/thirdparty/eigen/test/selfadjoint.cpp @@ -0,0 +1,72 @@ +// This file is triangularView of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +// This file tests the basic selfadjointView API, +// the related products and decompositions are tested in specific files. + +template void selfadjoint(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + m4(rows, cols); + + m1.diagonal() = m1.diagonal().real().template cast(); + + // check selfadjoint to dense + m3 = m1.template selfadjointView(); + VERIFY_IS_APPROX(MatrixType(m3.template triangularView()), MatrixType(m1.template triangularView())); + VERIFY_IS_APPROX(m3, m3.adjoint()); + + m3 = m1.template selfadjointView(); + VERIFY_IS_APPROX(MatrixType(m3.template triangularView()), MatrixType(m1.template triangularView())); + VERIFY_IS_APPROX(m3, m3.adjoint()); + + m3 = m1.template selfadjointView(); + m4 = m2; + m4 += m1.template selfadjointView(); + VERIFY_IS_APPROX(m4, m2+m3); + + m3 = m1.template selfadjointView(); + m4 = m2; + m4 -= m1.template selfadjointView(); + VERIFY_IS_APPROX(m4, m2-m3); +} + +void bug_159() +{ + Matrix3d m = Matrix3d::Random().selfadjointView(); + EIGEN_UNUSED_VARIABLE(m) +} + +void test_selfadjoint() +{ + for(int i = 0; i < g_repeat ; i++) + { + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); + + CALL_SUBTEST_1( selfadjoint(Matrix()) ); + CALL_SUBTEST_2( selfadjoint(Matrix()) ); + CALL_SUBTEST_3( selfadjoint(Matrix3cf()) ); + CALL_SUBTEST_4( selfadjoint(MatrixXcd(s,s)) ); + CALL_SUBTEST_5( selfadjoint(Matrix(s, s)) ); + + TEST_SET_BUT_UNUSED_VARIABLE(s) + } + + CALL_SUBTEST_1( bug_159() ); +} diff --git a/thirdparty/eigen/test/simplicial_cholesky.cpp b/thirdparty/eigen/test/simplicial_cholesky.cpp new file mode 100644 index 000000000..649c817b4 --- /dev/null +++ b/thirdparty/eigen/test/simplicial_cholesky.cpp @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" + +template void test_simplicial_cholesky_T() +{ + typedef SparseMatrix SparseMatrixType; + SimplicialCholesky chol_colmajor_lower_amd; + SimplicialCholesky chol_colmajor_upper_amd; + SimplicialLLT< SparseMatrixType, Lower> llt_colmajor_lower_amd; + SimplicialLLT< SparseMatrixType, Upper> llt_colmajor_upper_amd; + SimplicialLDLT< SparseMatrixType, Lower> ldlt_colmajor_lower_amd; + SimplicialLDLT< SparseMatrixType, Upper> ldlt_colmajor_upper_amd; + SimplicialLDLT< SparseMatrixType, Lower, NaturalOrdering > ldlt_colmajor_lower_nat; + SimplicialLDLT< SparseMatrixType, Upper, NaturalOrdering > ldlt_colmajor_upper_nat; + + check_sparse_spd_solving(chol_colmajor_lower_amd); + check_sparse_spd_solving(chol_colmajor_upper_amd); + check_sparse_spd_solving(llt_colmajor_lower_amd); + check_sparse_spd_solving(llt_colmajor_upper_amd); + check_sparse_spd_solving(ldlt_colmajor_lower_amd); + check_sparse_spd_solving(ldlt_colmajor_upper_amd); + + check_sparse_spd_determinant(chol_colmajor_lower_amd); + check_sparse_spd_determinant(chol_colmajor_upper_amd); + check_sparse_spd_determinant(llt_colmajor_lower_amd); + check_sparse_spd_determinant(llt_colmajor_upper_amd); + check_sparse_spd_determinant(ldlt_colmajor_lower_amd); + check_sparse_spd_determinant(ldlt_colmajor_upper_amd); + + check_sparse_spd_solving(ldlt_colmajor_lower_nat, 300, 1000); + check_sparse_spd_solving(ldlt_colmajor_upper_nat, 300, 1000); +} + +void test_simplicial_cholesky() +{ + CALL_SUBTEST_1(( test_simplicial_cholesky_T() )); + CALL_SUBTEST_2(( test_simplicial_cholesky_T, int>() )); + CALL_SUBTEST_3(( test_simplicial_cholesky_T() )); +} diff --git a/thirdparty/eigen/test/sizeof.cpp b/thirdparty/eigen/test/sizeof.cpp new file mode 100644 index 000000000..03ad20453 --- /dev/null +++ b/thirdparty/eigen/test/sizeof.cpp @@ -0,0 +1,47 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void verifySizeOf(const MatrixType&) +{ + typedef typename MatrixType::Scalar Scalar; + if (MatrixType::RowsAtCompileTime!=Dynamic && MatrixType::ColsAtCompileTime!=Dynamic) + VERIFY_IS_EQUAL(std::ptrdiff_t(sizeof(MatrixType)),std::ptrdiff_t(sizeof(Scalar))*std::ptrdiff_t(MatrixType::SizeAtCompileTime)); + else + VERIFY_IS_EQUAL(sizeof(MatrixType),sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index)); +} + +void test_sizeof() +{ + CALL_SUBTEST(verifySizeOf(Matrix()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Vector2d()) ); + CALL_SUBTEST(verifySizeOf(Vector4f()) ); + CALL_SUBTEST(verifySizeOf(Matrix4d()) ); + CALL_SUBTEST(verifySizeOf(Matrix()) ); + CALL_SUBTEST(verifySizeOf(Matrix()) ); + CALL_SUBTEST(verifySizeOf(MatrixXcf(3, 3)) ); + CALL_SUBTEST(verifySizeOf(MatrixXi(8, 12)) ); + CALL_SUBTEST(verifySizeOf(MatrixXcd(20, 20)) ); + CALL_SUBTEST(verifySizeOf(Matrix()) ); + + VERIFY(sizeof(std::complex) == 2*sizeof(float)); + VERIFY(sizeof(std::complex) == 2*sizeof(double)); +} diff --git a/thirdparty/eigen/test/sizeoverflow.cpp b/thirdparty/eigen/test/sizeoverflow.cpp new file mode 100644 index 000000000..240d22294 --- /dev/null +++ b/thirdparty/eigen/test/sizeoverflow.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#define VERIFY_THROWS_BADALLOC(a) { \ + bool threw = false; \ + try { \ + a; \ + } \ + catch (std::bad_alloc&) { threw = true; } \ + VERIFY(threw && "should have thrown bad_alloc: " #a); \ + } + +template +void triggerMatrixBadAlloc(Index rows, Index cols) +{ + VERIFY_THROWS_BADALLOC( MatrixType m(rows, cols) ); + VERIFY_THROWS_BADALLOC( MatrixType m; m.resize(rows, cols) ); + VERIFY_THROWS_BADALLOC( MatrixType m; m.conservativeResize(rows, cols) ); +} + +template +void triggerVectorBadAlloc(Index size) +{ + VERIFY_THROWS_BADALLOC( VectorType v(size) ); + VERIFY_THROWS_BADALLOC( VectorType v; v.resize(size) ); + VERIFY_THROWS_BADALLOC( VectorType v; v.conservativeResize(size) ); +} + +void test_sizeoverflow() +{ + // there are 2 levels of overflow checking. first in PlainObjectBase.h we check for overflow in rows*cols computations. + // this is tested in tests of the form times_itself_gives_0 * times_itself_gives_0 + // Then in Memory.h we check for overflow in size * sizeof(T) computations. + // this is tested in tests of the form times_4_gives_0 * sizeof(float) + + size_t times_itself_gives_0 = size_t(1) << (8 * sizeof(Index) / 2); + VERIFY(times_itself_gives_0 * times_itself_gives_0 == 0); + + size_t times_4_gives_0 = size_t(1) << (8 * sizeof(Index) - 2); + VERIFY(times_4_gives_0 * 4 == 0); + + size_t times_8_gives_0 = size_t(1) << (8 * sizeof(Index) - 3); + VERIFY(times_8_gives_0 * 8 == 0); + + triggerMatrixBadAlloc(times_itself_gives_0, times_itself_gives_0); + triggerMatrixBadAlloc(times_itself_gives_0 / 4, times_itself_gives_0); + triggerMatrixBadAlloc(times_4_gives_0, 1); + + triggerMatrixBadAlloc(times_itself_gives_0, times_itself_gives_0); + triggerMatrixBadAlloc(times_itself_gives_0 / 8, times_itself_gives_0); + triggerMatrixBadAlloc(times_8_gives_0, 1); + + triggerVectorBadAlloc(times_4_gives_0); + + triggerVectorBadAlloc(times_8_gives_0); +} diff --git a/thirdparty/eigen/test/smallvectors.cpp b/thirdparty/eigen/test/smallvectors.cpp new file mode 100644 index 000000000..781511397 --- /dev/null +++ b/thirdparty/eigen/test/smallvectors.cpp @@ -0,0 +1,67 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT +#include "main.h" + +template void smallVectors() +{ + typedef Matrix V2; + typedef Matrix V3; + typedef Matrix V4; + typedef Matrix VX; + Scalar x1 = internal::random(), + x2 = internal::random(), + x3 = internal::random(), + x4 = internal::random(); + V2 v2(x1, x2); + V3 v3(x1, x2, x3); + V4 v4(x1, x2, x3, x4); + VERIFY_IS_APPROX(x1, v2.x()); + VERIFY_IS_APPROX(x1, v3.x()); + VERIFY_IS_APPROX(x1, v4.x()); + VERIFY_IS_APPROX(x2, v2.y()); + VERIFY_IS_APPROX(x2, v3.y()); + VERIFY_IS_APPROX(x2, v4.y()); + VERIFY_IS_APPROX(x3, v3.z()); + VERIFY_IS_APPROX(x3, v4.z()); + VERIFY_IS_APPROX(x4, v4.w()); + + if (!NumTraits::IsInteger) + { + VERIFY_RAISES_ASSERT(V3(2, 1)) + VERIFY_RAISES_ASSERT(V3(3, 2)) + VERIFY_RAISES_ASSERT(V3(Scalar(3), 1)) + VERIFY_RAISES_ASSERT(V3(3, Scalar(1))) + VERIFY_RAISES_ASSERT(V3(Scalar(3), Scalar(1))) + VERIFY_RAISES_ASSERT(V3(Scalar(123), Scalar(123))) + + VERIFY_RAISES_ASSERT(V4(1, 3)) + VERIFY_RAISES_ASSERT(V4(2, 4)) + VERIFY_RAISES_ASSERT(V4(1, Scalar(4))) + VERIFY_RAISES_ASSERT(V4(Scalar(1), 4)) + VERIFY_RAISES_ASSERT(V4(Scalar(1), Scalar(4))) + VERIFY_RAISES_ASSERT(V4(Scalar(123), Scalar(123))) + + VERIFY_RAISES_ASSERT(VX(3, 2)) + VERIFY_RAISES_ASSERT(VX(Scalar(3), 1)) + VERIFY_RAISES_ASSERT(VX(3, Scalar(1))) + VERIFY_RAISES_ASSERT(VX(Scalar(3), Scalar(1))) + VERIFY_RAISES_ASSERT(VX(Scalar(123), Scalar(123))) + } +} + +void test_smallvectors() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST(smallVectors() ); + CALL_SUBTEST(smallVectors() ); + CALL_SUBTEST(smallVectors() ); + } +} diff --git a/thirdparty/eigen/test/sparse.h b/thirdparty/eigen/test/sparse.h new file mode 100644 index 000000000..9912e1e24 --- /dev/null +++ b/thirdparty/eigen/test/sparse.h @@ -0,0 +1,210 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TESTSPARSE_H +#define EIGEN_TESTSPARSE_H + +#define EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET + +#include "main.h" + +#if EIGEN_GNUC_AT_LEAST(4,0) && !defined __ICC && !defined(__clang__) + +#ifdef min +#undef min +#endif + +#ifdef max +#undef max +#endif + +#include +#define EIGEN_UNORDERED_MAP_SUPPORT +namespace std { + using std::tr1::unordered_map; +} +#endif + +#ifdef EIGEN_GOOGLEHASH_SUPPORT + #include +#endif + +#include +#include +#include + +enum { + ForceNonZeroDiag = 1, + MakeLowerTriangular = 2, + MakeUpperTriangular = 4, + ForceRealDiag = 8 +}; + +/* Initializes both a sparse and dense matrix with same random values, + * and a ratio of \a density non zero entries. + * \param flags is a union of ForceNonZeroDiag, MakeLowerTriangular and MakeUpperTriangular + * allowing to control the shape of the matrix. + * \param zeroCoords and nonzeroCoords allows to get the coordinate lists of the non zero, + * and zero coefficients respectively. + */ +template void +initSparse(double density, + Matrix& refMat, + SparseMatrix& sparseMat, + int flags = 0, + std::vector >* zeroCoords = 0, + std::vector >* nonzeroCoords = 0) +{ + enum { IsRowMajor = SparseMatrix::IsRowMajor }; + sparseMat.setZero(); + //sparseMat.reserve(int(refMat.rows()*refMat.cols()*density)); + sparseMat.reserve(VectorXi::Constant(IsRowMajor ? refMat.rows() : refMat.cols(), int((1.5*density)*(IsRowMajor?refMat.cols():refMat.rows())))); + + for(Index j=0; j(0,1) < density) ? internal::random() : Scalar(0); + if ((flags&ForceNonZeroDiag) && (i==j)) + { + // FIXME: the following is too conservative + v = internal::random()*Scalar(3.); + v = v*v; + if(numext::real(v)>0) v += Scalar(5); + else v -= Scalar(5); + } + if ((flags & MakeLowerTriangular) && aj>ai) + v = Scalar(0); + else if ((flags & MakeUpperTriangular) && ajpush_back(Matrix (ai,aj)); + } + else if (zeroCoords) + { + zeroCoords->push_back(Matrix (ai,aj)); + } + refMat(ai,aj) = v; + } + } + //sparseMat.finalize(); +} + +template void +initSparse(double density, + Matrix& refMat, + DynamicSparseMatrix& sparseMat, + int flags = 0, + std::vector >* zeroCoords = 0, + std::vector >* nonzeroCoords = 0) +{ + enum { IsRowMajor = DynamicSparseMatrix::IsRowMajor }; + sparseMat.setZero(); + sparseMat.reserve(int(refMat.rows()*refMat.cols()*density)); + for(int j=0; j(0,1) < density) ? internal::random() : Scalar(0); + if ((flags&ForceNonZeroDiag) && (i==j)) + { + v = internal::random()*Scalar(3.); + v = v*v + Scalar(5.); + } + if ((flags & MakeLowerTriangular) && aj>ai) + v = Scalar(0); + else if ((flags & MakeUpperTriangular) && ajpush_back(Matrix (ai,aj)); + } + else if (zeroCoords) + { + zeroCoords->push_back(Matrix (ai,aj)); + } + refMat(ai,aj) = v; + } + } + sparseMat.finalize(); +} + +template void +initSparse(double density, + Matrix& refVec, + SparseVector& sparseVec, + std::vector* zeroCoords = 0, + std::vector* nonzeroCoords = 0) +{ + sparseVec.reserve(int(refVec.size()*density)); + sparseVec.setZero(); + for(int i=0; i(0,1) < density) ? internal::random() : Scalar(0); + if (v!=Scalar(0)) + { + sparseVec.insertBack(i) = v; + if (nonzeroCoords) + nonzeroCoords->push_back(i); + } + else if (zeroCoords) + zeroCoords->push_back(i); + refVec[i] = v; + } +} + +template void +initSparse(double density, + Matrix& refVec, + SparseVector& sparseVec, + std::vector* zeroCoords = 0, + std::vector* nonzeroCoords = 0) +{ + sparseVec.reserve(int(refVec.size()*density)); + sparseVec.setZero(); + for(int i=0; i(0,1) < density) ? internal::random() : Scalar(0); + if (v!=Scalar(0)) + { + sparseVec.insertBack(i) = v; + if (nonzeroCoords) + nonzeroCoords->push_back(i); + } + else if (zeroCoords) + zeroCoords->push_back(i); + refVec[i] = v; + } +} + + +#include +#endif // EIGEN_TESTSPARSE_H diff --git a/thirdparty/eigen/test/sparseLM.cpp b/thirdparty/eigen/test/sparseLM.cpp new file mode 100644 index 000000000..8e148f9bc --- /dev/null +++ b/thirdparty/eigen/test/sparseLM.cpp @@ -0,0 +1,176 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#include +#include +#include + +#include "main.h" +#include + +using namespace std; +using namespace Eigen; + +template +struct sparseGaussianTest : SparseFunctor +{ + typedef Matrix VectorType; + typedef SparseFunctor Base; + typedef typename Base::JacobianType JacobianType; + sparseGaussianTest(int inputs, int values) : SparseFunctor(inputs,values) + { } + + VectorType model(const VectorType& uv, VectorType& x) + { + VectorType y; //Change this to use expression template + int m = Base::values(); + int n = Base::inputs(); + eigen_assert(uv.size()%2 == 0); + eigen_assert(uv.size() == n); + eigen_assert(x.size() == m); + y.setZero(m); + int half = n/2; + VectorBlock u(uv, 0, half); + VectorBlock v(uv, half, half); + Scalar coeff; + for (int j = 0; j < m; j++) + { + for (int i = 0; i < half; i++) + { + coeff = (x(j)-i)/v(i); + coeff *= coeff; + if (coeff < 1. && coeff > 0.) + y(j) += u(i)*std::pow((1-coeff), 2); + } + } + return y; + } + void initPoints(VectorType& uv_ref, VectorType& x) + { + m_x = x; + m_y = this->model(uv_ref,x); + } + int operator()(const VectorType& uv, VectorType& fvec) + { + int m = Base::values(); + int n = Base::inputs(); + eigen_assert(uv.size()%2 == 0); + eigen_assert(uv.size() == n); + int half = n/2; + VectorBlock u(uv, 0, half); + VectorBlock v(uv, half, half); + fvec = m_y; + Scalar coeff; + for (int j = 0; j < m; j++) + { + for (int i = 0; i < half; i++) + { + coeff = (m_x(j)-i)/v(i); + coeff *= coeff; + if (coeff < 1. && coeff > 0.) + fvec(j) -= u(i)*std::pow((1-coeff), 2); + } + } + return 0; + } + + int df(const VectorType& uv, JacobianType& fjac) + { + int m = Base::values(); + int n = Base::inputs(); + eigen_assert(n == uv.size()); + eigen_assert(fjac.rows() == m); + eigen_assert(fjac.cols() == n); + int half = n/2; + VectorBlock u(uv, 0, half); + VectorBlock v(uv, half, half); + Scalar coeff; + + //Derivatives with respect to u + for (int col = 0; col < half; col++) + { + for (int row = 0; row < m; row++) + { + coeff = (m_x(row)-col)/v(col); + coeff = coeff*coeff; + if(coeff < 1. && coeff > 0.) + { + fjac.coeffRef(row,col) = -(1-coeff)*(1-coeff); + } + } + } + //Derivatives with respect to v + for (int col = 0; col < half; col++) + { + for (int row = 0; row < m; row++) + { + coeff = (m_x(row)-col)/v(col); + coeff = coeff*coeff; + if(coeff < 1. && coeff > 0.) + { + fjac.coeffRef(row,col+half) = -4 * (u(col)/v(col))*coeff*(1-coeff); + } + } + } + return 0; + } + + VectorType m_x, m_y; //Data points +}; + + +template +void test_sparseLM_T() +{ + typedef Matrix VectorType; + + int inputs = 10; + int values = 2000; + sparseGaussianTest sparse_gaussian(inputs, values); + VectorType uv(inputs),uv_ref(inputs); + VectorType x(values); + // Generate the reference solution + uv_ref << -2, 1, 4 ,8, 6, 1.8, 1.2, 1.1, 1.9 , 3; + //Generate the reference data points + x.setRandom(); + x = 10*x; + x.array() += 10; + sparse_gaussian.initPoints(uv_ref, x); + + + // Generate the initial parameters + VectorBlock u(uv, 0, inputs/2); + VectorBlock v(uv, inputs/2, inputs/2); + v.setOnes(); + //Generate u or Solve for u from v + u.setOnes(); + + // Solve the optimization problem + LevenbergMarquardt > lm(sparse_gaussian); + int info; +// info = lm.minimize(uv); + + VERIFY_IS_EQUAL(info,1); + // Do a step by step solution and save the residual + int maxiter = 200; + int iter = 0; + MatrixXd Err(values, maxiter); + MatrixXd Mod(values, maxiter); + LevenbergMarquardtSpace::Status status; + status = lm.minimizeInit(uv); + if (status==LevenbergMarquardtSpace::ImproperInputParameters) + return ; + +} +void test_sparseLM() +{ + CALL_SUBTEST_1(test_sparseLM_T()); + + // CALL_SUBTEST_2(test_sparseLM_T()); +} diff --git a/thirdparty/eigen/test/sparse_basic.cpp b/thirdparty/eigen/test/sparse_basic.cpp new file mode 100644 index 000000000..2a3117b2b --- /dev/null +++ b/thirdparty/eigen/test/sparse_basic.cpp @@ -0,0 +1,639 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// Copyright (C) 2008 Daniel Gomez Ferro +// Copyright (C) 2013 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static long g_realloc_count = 0; +#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++; + +#include "sparse.h" + +template void sparse_basic(const SparseMatrixType& ref) +{ + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef Matrix Vector2; + + const Index rows = ref.rows(); + const Index cols = ref.cols(); + //const Index inner = ref.innerSize(); + //const Index outer = ref.outerSize(); + + typedef typename SparseMatrixType::Scalar Scalar; + enum { Flags = SparseMatrixType::Flags }; + + double density = (std::max)(8./(rows*cols), 0.01); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + Scalar eps = 1e-6; + + Scalar s1 = internal::random(); + { + SparseMatrixType m(rows, cols); + DenseMatrix refMat = DenseMatrix::Zero(rows, cols); + DenseVector vec1 = DenseVector::Random(rows); + + std::vector zeroCoords; + std::vector nonzeroCoords; + initSparse(density, refMat, m, 0, &zeroCoords, &nonzeroCoords); + + // test coeff and coeffRef + for (std::size_t i=0; i >::value) + VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[i].x(),zeroCoords[i].y()) = 5 ); + } + VERIFY_IS_APPROX(m, refMat); + + if(!nonzeroCoords.empty()) { + m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); + refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); + } + + VERIFY_IS_APPROX(m, refMat); + + // test assertion + VERIFY_RAISES_ASSERT( m.coeffRef(-1,1) = 0 ); + VERIFY_RAISES_ASSERT( m.coeffRef(0,m.cols()) = 0 ); + } + + // test insert (inner random) + { + DenseMatrix m1(rows,cols); + m1.setZero(); + SparseMatrixType m2(rows,cols); + bool call_reserve = internal::random()%2; + Index nnz = internal::random(1,int(rows)/2); + if(call_reserve) + { + if(internal::random()%2) + m2.reserve(VectorXi::Constant(m2.outerSize(), int(nnz))); + else + m2.reserve(m2.outerSize() * nnz); + } + g_realloc_count = 0; + for (Index j=0; j(0,rows-1); + if (m1.coeff(i,j)==Scalar(0)) + m2.insert(i,j) = m1(i,j) = internal::random(); + } + } + + if(call_reserve && !SparseMatrixType::IsRowMajor) + { + VERIFY(g_realloc_count==0); + } + + m2.finalize(); + VERIFY_IS_APPROX(m2,m1); + } + + // test insert (fully random) + { + DenseMatrix m1(rows,cols); + m1.setZero(); + SparseMatrixType m2(rows,cols); + if(internal::random()%2) + m2.reserve(VectorXi::Constant(m2.outerSize(), 2)); + for (int k=0; k(0,rows-1); + Index j = internal::random(0,cols-1); + if ((m1.coeff(i,j)==Scalar(0)) && (internal::random()%2)) + m2.insert(i,j) = m1(i,j) = internal::random(); + else + { + Scalar v = internal::random(); + m2.coeffRef(i,j) += v; + m1(i,j) += v; + } + } + VERIFY_IS_APPROX(m2,m1); + } + + // test insert (un-compressed) + for(int mode=0;mode<4;++mode) + { + DenseMatrix m1(rows,cols); + m1.setZero(); + SparseMatrixType m2(rows,cols); + VectorXi r(VectorXi::Constant(m2.outerSize(), ((mode%2)==0) ? int(m2.innerSize()) : std::max(1,int(m2.innerSize())/8))); + m2.reserve(r); + for (Index k=0; k(0,rows-1); + Index j = internal::random(0,cols-1); + if (m1.coeff(i,j)==Scalar(0)) + m2.insert(i,j) = m1(i,j) = internal::random(); + if(mode==3) + m2.reserve(r); + } + if(internal::random()%2) + m2.makeCompressed(); + VERIFY_IS_APPROX(m2,m1); + } + + // test basic computations + { + DenseMatrix refM1 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM2 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM3 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM4 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m1(rows, cols); + SparseMatrixType m2(rows, cols); + SparseMatrixType m3(rows, cols); + SparseMatrixType m4(rows, cols); + initSparse(density, refM1, m1); + initSparse(density, refM2, m2); + initSparse(density, refM3, m3); + initSparse(density, refM4, m4); + + if(internal::random()) + m1.makeCompressed(); + + VERIFY_IS_APPROX(m1*s1, refM1*s1); + VERIFY_IS_APPROX(m1+m2, refM1+refM2); + VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3); + VERIFY_IS_APPROX(m3.cwiseProduct(m1+m2), refM3.cwiseProduct(refM1+refM2)); + VERIFY_IS_APPROX(m1*s1-m2, refM1*s1-refM2); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.row(0).dot(refM2.row(0))); + else + VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.col(0)), refM1.col(0).dot(refM2.col(0))); + + DenseVector rv = DenseVector::Random(m1.cols()); + DenseVector cv = DenseVector::Random(m1.rows()); + Index r = internal::random(0,m1.rows()-2); + Index c = internal::random(0,m1.cols()-1); + VERIFY_IS_APPROX(( m1.template block<1,Dynamic>(r,0,1,m1.cols()).dot(rv)) , refM1.row(r).dot(rv)); + VERIFY_IS_APPROX(m1.row(r).dot(rv), refM1.row(r).dot(rv)); + VERIFY_IS_APPROX(m1.col(c).dot(cv), refM1.col(c).dot(cv)); + + VERIFY_IS_APPROX(m1.conjugate(), refM1.conjugate()); + VERIFY_IS_APPROX(m1.real(), refM1.real()); + + refM4.setRandom(); + // sparse cwise* dense + VERIFY_IS_APPROX(m3.cwiseProduct(refM4), refM3.cwiseProduct(refM4)); + // dense cwise* sparse + VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3)); +// VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4); + + VERIFY_IS_APPROX(refM4 + m3, refM4 + refM3); + VERIFY_IS_APPROX(m3 + refM4, refM3 + refM4); + VERIFY_IS_APPROX(refM4 - m3, refM4 - refM3); + VERIFY_IS_APPROX(m3 - refM4, refM3 - refM4); + + VERIFY_IS_APPROX(m1.sum(), refM1.sum()); + + VERIFY_IS_APPROX(m1*=s1, refM1*=s1); + VERIFY_IS_APPROX(m1/=s1, refM1/=s1); + + VERIFY_IS_APPROX(m1+=m2, refM1+=refM2); + VERIFY_IS_APPROX(m1-=m2, refM1-=refM2); + + // test aliasing + VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1)); + VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval())); + VERIFY_IS_APPROX((m1 = -m1.transpose()), (refM1 = -refM1.transpose().eval())); + VERIFY_IS_APPROX((m1 += -m1), (refM1 += -refM1)); + + if(m1.isCompressed()) + { + VERIFY_IS_APPROX(m1.coeffs().sum(), m1.sum()); + m1.coeffs() += s1; + for(Index j = 0; j SpBool; + SpBool mb1 = m1.real().template cast(); + SpBool mb2 = m2.real().template cast(); + VERIFY_IS_EQUAL(mb1.template cast().sum(), refM1.real().template cast().count()); + VERIFY_IS_EQUAL((mb1 && mb2).template cast().sum(), (refM1.real().template cast() && refM2.real().template cast()).count()); + VERIFY_IS_EQUAL((mb1 || mb2).template cast().sum(), (refM1.real().template cast() || refM2.real().template cast()).count()); + SpBool mb3 = mb1 && mb2; + if(mb1.coeffs().all() && mb2.coeffs().all()) + { + VERIFY_IS_EQUAL(mb3.nonZeros(), (refM1.real().template cast() && refM2.real().template cast()).count()); + } + } + } + + // test reverse iterators + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + std::vector ref_value(m2.innerSize()); + std::vector ref_index(m2.innerSize()); + if(internal::random()) + m2.makeCompressed(); + for(Index j = 0; j(density, refMat2, m2); + VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval()); + VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose()); + + VERIFY_IS_APPROX(SparseMatrixType(m2.adjoint()), refMat2.adjoint()); + + // check isApprox handles opposite storage order + typename Transpose::PlainObject m3(m2); + VERIFY(m2.isApprox(m3)); + } + + // test prune + { + SparseMatrixType m2(rows, cols); + DenseMatrix refM2(rows, cols); + refM2.setZero(); + int countFalseNonZero = 0; + int countTrueNonZero = 0; + m2.reserve(VectorXi::Constant(m2.outerSize(), int(m2.innerSize()))); + for (Index j=0; j(0,1); + if (x<0.1f) + { + // do nothing + } + else if (x<0.5f) + { + countFalseNonZero++; + m2.insert(i,j) = Scalar(0); + } + else + { + countTrueNonZero++; + m2.insert(i,j) = Scalar(1); + refM2(i,j) = Scalar(1); + } + } + } + if(internal::random()) + m2.makeCompressed(); + VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros()); + if(countTrueNonZero>0) + VERIFY_IS_APPROX(m2, refM2); + m2.prune(Scalar(1)); + VERIFY(countTrueNonZero==m2.nonZeros()); + VERIFY_IS_APPROX(m2, refM2); + } + + // test setFromTriplets + { + typedef Triplet TripletType; + std::vector triplets; + Index ntriplets = rows*cols; + triplets.reserve(ntriplets); + DenseMatrix refMat_sum = DenseMatrix::Zero(rows,cols); + DenseMatrix refMat_prod = DenseMatrix::Zero(rows,cols); + DenseMatrix refMat_last = DenseMatrix::Zero(rows,cols); + + for(Index i=0;i(0,StorageIndex(rows-1)); + StorageIndex c = internal::random(0,StorageIndex(cols-1)); + Scalar v = internal::random(); + triplets.push_back(TripletType(r,c,v)); + refMat_sum(r,c) += v; + if(std::abs(refMat_prod(r,c))==0) + refMat_prod(r,c) = v; + else + refMat_prod(r,c) *= v; + refMat_last(r,c) = v; + } + SparseMatrixType m(rows,cols); + m.setFromTriplets(triplets.begin(), triplets.end()); + VERIFY_IS_APPROX(m, refMat_sum); + + m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies()); + VERIFY_IS_APPROX(m, refMat_prod); +#if (defined(__cplusplus) && __cplusplus >= 201103L) + m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; }); + VERIFY_IS_APPROX(m, refMat_last); +#endif + } + + // test Map + { + DenseMatrix refMat2(rows, cols), refMat3(rows, cols); + SparseMatrixType m2(rows, cols), m3(rows, cols); + initSparse(density, refMat2, m2); + initSparse(density, refMat3, m3); + { + Map mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr()); + Map mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr()); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + } + { + MappedSparseMatrix mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr()); + MappedSparseMatrix mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr()); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + } + + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + m2.coeffRef(i,j) = 123; + if(internal::random()) + m2.makeCompressed(); + Map mapMat2(rows, cols, m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr()); + VERIFY_IS_EQUAL(m2.coeff(i,j),Scalar(123)); + VERIFY_IS_EQUAL(mapMat2.coeff(i,j),Scalar(123)); + mapMat2.coeffRef(i,j) = -123; + VERIFY_IS_EQUAL(m2.coeff(i,j),Scalar(-123)); + } + + // test triangularView + { + DenseMatrix refMat2(rows, cols), refMat3(rows, cols); + SparseMatrixType m2(rows, cols), m3(rows, cols); + initSparse(density, refMat2, m2); + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + + { + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + } + + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + + // check sparse-traingular to dense + refMat3 = m2.template triangularView(); + VERIFY_IS_APPROX(refMat3, DenseMatrix(refMat2.template triangularView())); + } + + // test selfadjointView + if(!SparseMatrixType::IsRowMajor) + { + DenseMatrix refMat2(rows, rows), refMat3(rows, rows); + SparseMatrixType m2(rows, rows), m3(rows, rows); + initSparse(density, refMat2, m2); + refMat3 = refMat2.template selfadjointView(); + m3 = m2.template selfadjointView(); + VERIFY_IS_APPROX(m3, refMat3); + + // selfadjointView only works for square matrices: + SparseMatrixType m4(rows, rows+1); + VERIFY_RAISES_ASSERT(m4.template selfadjointView()); + VERIFY_RAISES_ASSERT(m4.template selfadjointView()); + } + + // test sparseView + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); + SparseMatrixType m2(rows, rows); + initSparse(density, refMat2, m2); + VERIFY_IS_APPROX(m2.eval(), refMat2.sparseView().eval()); + + // sparse view on expressions: + VERIFY_IS_APPROX((s1*m2).eval(), (s1*refMat2).sparseView().eval()); + VERIFY_IS_APPROX((m2+m2).eval(), (refMat2+refMat2).sparseView().eval()); + VERIFY_IS_APPROX((m2*m2).eval(), (refMat2.lazyProduct(refMat2)).sparseView().eval()); + VERIFY_IS_APPROX((m2*m2).eval(), (refMat2*refMat2).sparseView().eval()); + } + + // test diagonal + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + VERIFY_IS_APPROX(m2.diagonal(), refMat2.diagonal().eval()); + VERIFY_IS_APPROX(const_cast(m2).diagonal(), refMat2.diagonal().eval()); + + initSparse(density, refMat2, m2, ForceNonZeroDiag); + m2.diagonal() += refMat2.diagonal(); + refMat2.diagonal() += refMat2.diagonal(); + VERIFY_IS_APPROX(m2, refMat2); + } + + // test diagonal to sparse + { + DenseVector d = DenseVector::Random(rows); + DenseMatrix refMat2 = d.asDiagonal(); + SparseMatrixType m2(rows, rows); + m2 = d.asDiagonal(); + VERIFY_IS_APPROX(m2, refMat2); + SparseMatrixType m3(d.asDiagonal()); + VERIFY_IS_APPROX(m3, refMat2); + refMat2 += d.asDiagonal(); + m2 += d.asDiagonal(); + VERIFY_IS_APPROX(m2, refMat2); + } + + // test conservative resize + { + std::vector< std::pair > inc; + if(rows > 3 && cols > 2) + inc.push_back(std::pair(-3,-2)); + inc.push_back(std::pair(0,0)); + inc.push_back(std::pair(3,2)); + inc.push_back(std::pair(3,0)); + inc.push_back(std::pair(0,3)); + + for(size_t i = 0; i< inc.size(); i++) { + StorageIndex incRows = inc[i].first; + StorageIndex incCols = inc[i].second; + SparseMatrixType m1(rows, cols); + DenseMatrix refMat1 = DenseMatrix::Zero(rows, cols); + initSparse(density, refMat1, m1); + + m1.conservativeResize(rows+incRows, cols+incCols); + refMat1.conservativeResize(rows+incRows, cols+incCols); + if (incRows > 0) refMat1.bottomRows(incRows).setZero(); + if (incCols > 0) refMat1.rightCols(incCols).setZero(); + + VERIFY_IS_APPROX(m1, refMat1); + + // Insert new values + if (incRows > 0) + m1.insert(m1.rows()-1, 0) = refMat1(refMat1.rows()-1, 0) = 1; + if (incCols > 0) + m1.insert(0, m1.cols()-1) = refMat1(0, refMat1.cols()-1) = 1; + + VERIFY_IS_APPROX(m1, refMat1); + + + } + } + + // test Identity matrix + { + DenseMatrix refMat1 = DenseMatrix::Identity(rows, rows); + SparseMatrixType m1(rows, rows); + m1.setIdentity(); + VERIFY_IS_APPROX(m1, refMat1); + for(int k=0; k(0,rows-1); + Index j = internal::random(0,rows-1); + Scalar v = internal::random(); + m1.coeffRef(i,j) = v; + refMat1.coeffRef(i,j) = v; + VERIFY_IS_APPROX(m1, refMat1); + if(internal::random(0,10)<2) + m1.makeCompressed(); + } + m1.setIdentity(); + refMat1.setIdentity(); + VERIFY_IS_APPROX(m1, refMat1); + } + + // test array/vector of InnerIterator + { + typedef typename SparseMatrixType::InnerIterator IteratorType; + + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + IteratorType static_array[2]; + static_array[0] = IteratorType(m2,0); + static_array[1] = IteratorType(m2,m2.outerSize()-1); + VERIFY( static_array[0] || m2.innerVector(static_array[0].outer()).nonZeros() == 0 ); + VERIFY( static_array[1] || m2.innerVector(static_array[1].outer()).nonZeros() == 0 ); + if(static_array[0] && static_array[1]) + { + ++(static_array[1]); + static_array[1] = IteratorType(m2,0); + VERIFY( static_array[1] ); + VERIFY( static_array[1].index() == static_array[0].index() ); + VERIFY( static_array[1].outer() == static_array[0].outer() ); + VERIFY( static_array[1].value() == static_array[0].value() ); + } + + std::vector iters(2); + iters[0] = IteratorType(m2,0); + iters[1] = IteratorType(m2,m2.outerSize()-1); + } +} + + +template +void big_sparse_triplet(Index rows, Index cols, double density) { + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef typename SparseMatrixType::Scalar Scalar; + typedef Triplet TripletType; + std::vector triplets; + double nelements = density * rows*cols; + VERIFY(nelements>=0 && nelements < NumTraits::highest()); + Index ntriplets = Index(nelements); + triplets.reserve(ntriplets); + Scalar sum = Scalar(0); + for(Index i=0;i(0,rows-1); + Index c = internal::random(0,cols-1); + Scalar v = internal::random(); + triplets.push_back(TripletType(r,c,v)); + sum += v; + } + SparseMatrixType m(rows,cols); + m.setFromTriplets(triplets.begin(), triplets.end()); + VERIFY(m.nonZeros() <= ntriplets); + VERIFY_IS_APPROX(sum, m.sum()); +} + + +void test_sparse_basic() +{ + for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_basic(SparseMatrix(1, 1)) )); + CALL_SUBTEST_1(( sparse_basic(SparseMatrix(8, 8)) )); + CALL_SUBTEST_2(( sparse_basic(SparseMatrix, ColMajor>(r, c)) )); + CALL_SUBTEST_2(( sparse_basic(SparseMatrix, RowMajor>(r, c)) )); + CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); + + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); + } + + // Regression test for bug 900: (manually insert higher values here, if you have enough RAM): + CALL_SUBTEST_3((big_sparse_triplet >(10000, 10000, 0.125))); + CALL_SUBTEST_4((big_sparse_triplet >(10000, 10000, 0.125))); + + // Regression test for bug 1105 +#ifdef EIGEN_TEST_PART_7 + { + int n = Eigen::internal::random(200,600); + SparseMatrix,0, long> mat(n, n); + std::complex val; + + for(int i=0; i +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" + +template void sparse_block(const SparseMatrixType& ref) +{ + const Index rows = ref.rows(); + const Index cols = ref.cols(); + const Index inner = ref.innerSize(); + const Index outer = ref.outerSize(); + + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + + double density = (std::max)(8./(rows*cols), 0.01); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + typedef Matrix RowDenseVector; + + Scalar s1 = internal::random(); + { + SparseMatrixType m(rows, cols); + DenseMatrix refMat = DenseMatrix::Zero(rows, cols); + initSparse(density, refMat, m); + + VERIFY_IS_APPROX(m, refMat); + + // test InnerIterators and Block expressions + for (int t=0; t<10; ++t) + { + Index j = internal::random(0,cols-2); + Index i = internal::random(0,rows-2); + Index w = internal::random(1,cols-j); + Index h = internal::random(1,rows-i); + + VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); + for(Index c=0; c(density, refMat2, m2); + Index j0 = internal::random(0,outer-1); + Index j1 = internal::random(0,outer-1); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0)); + else + VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1)); + else + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); + + SparseMatrixType m3(rows,cols); + m3.reserve(VectorXi::Constant(outer,int(inner/2))); + for(Index j=0; j(k+1); + for(Index j=0; j<(std::min)(outer, inner); ++j) + { + VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); + if(j>0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + m3.makeCompressed(); + for(Index j=0; j<(std::min)(outer, inner); ++j) + { + VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); + if(j>0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + + VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); + +// m2.innerVector(j0) = 2*m2.innerVector(j1); +// refMat2.col(j0) = 2*refMat2.col(j1); +// VERIFY_IS_APPROX(m2, refMat2); + } + + // test innerVectors() + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + if(internal::random(0,1)>0.5f) m2.makeCompressed(); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + VERIFY_IS_APPROX(m2, refMat2); + + VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros()); + + m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); + if(SparseMatrixType::IsRowMajor) + refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); + else + refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); + + VERIFY_IS_APPROX(m2, refMat2); + } + + // test generic blocks + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), + refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + Index i = internal::random(0,m2.outerSize()-1); + if(SparseMatrixType::IsRowMajor) { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.row(i) = refMat2.row(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } else { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.col(i) = refMat2.col(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } + + Index r0 = internal::random(0,rows-2); + Index c0 = internal::random(0,cols-2); + Index r1 = internal::random(1,rows-r0); + Index c1 = internal::random(1,cols-c0); + + VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0)); + VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0)); + + VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0)); + VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0)); + + VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1)); + VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1)); + } +} + +void test_sparse_block() +{ + for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(1, 1)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(8, 8)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, ColMajor>(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, RowMajor>(r, c)) )); + + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + } +} diff --git a/thirdparty/eigen/test/sparse_permutations.cpp b/thirdparty/eigen/test/sparse_permutations.cpp new file mode 100644 index 000000000..b82cceff8 --- /dev/null +++ b/thirdparty/eigen/test/sparse_permutations.cpp @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +static long int nb_transposed_copies; +#define EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN {nb_transposed_copies++;} +#define VERIFY_TRANSPOSITION_COUNT(XPR,N) {\ + nb_transposed_copies = 0; \ + XPR; \ + if(nb_transposed_copies!=N) std::cerr << "nb_transposed_copies == " << nb_transposed_copies << "\n"; \ + VERIFY( (#XPR) && nb_transposed_copies==N ); \ + } + +#include "sparse.h" + +template +bool is_sorted(const T& mat) { + for(Index k = 0; k=it.index()) + return false; + prev = it.index(); + } + } + return true; +} + +template +typename internal::nested_eval::type eval(const T &xpr) +{ + VERIFY( int(internal::nested_eval::type::Flags&RowMajorBit) == int(internal::evaluator::Flags&RowMajorBit) ); + return xpr; +} + +template void sparse_permutations(const SparseMatrixType& ref) +{ + const Index rows = ref.rows(); + const Index cols = ref.cols(); + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef SparseMatrix OtherSparseMatrixType; + typedef Matrix DenseMatrix; + typedef Matrix VectorI; +// bool IsRowMajor1 = SparseMatrixType::IsRowMajor; +// bool IsRowMajor2 = OtherSparseMatrixType::IsRowMajor; + + double density = (std::max)(8./(rows*cols), 0.01); + + SparseMatrixType mat(rows, cols), up(rows,cols), lo(rows,cols); + OtherSparseMatrixType res; + DenseMatrix mat_d = DenseMatrix::Zero(rows, cols), up_sym_d, lo_sym_d, res_d; + + initSparse(density, mat_d, mat, 0); + + up = mat.template triangularView(); + lo = mat.template triangularView(); + + up_sym_d = mat_d.template selfadjointView(); + lo_sym_d = mat_d.template selfadjointView(); + + VERIFY_IS_APPROX(mat, mat_d); + VERIFY_IS_APPROX(up, DenseMatrix(mat_d.template triangularView())); + VERIFY_IS_APPROX(lo, DenseMatrix(mat_d.template triangularView())); + + PermutationMatrix p, p_null; + VectorI pi; + randomPermutationVector(pi, cols); + p.indices() = pi; + + VERIFY( is_sorted( ::eval(mat*p) )); + VERIFY( is_sorted( res = mat*p )); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p), 0); + //VERIFY_TRANSPOSITION_COUNT( res = mat*p, IsRowMajor ? 1 : 0 ); + res_d = mat_d*p; + VERIFY(res.isApprox(res_d) && "mat*p"); + + VERIFY( is_sorted( ::eval(p*mat) )); + VERIFY( is_sorted( res = p*mat )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p*mat), 0); + res_d = p*mat_d; + VERIFY(res.isApprox(res_d) && "p*mat"); + + VERIFY( is_sorted( (mat*p).eval() )); + VERIFY( is_sorted( res = mat*p.inverse() )); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p.inverse()), 0); + res_d = mat*p.inverse(); + VERIFY(res.isApprox(res_d) && "mat*inv(p)"); + + VERIFY( is_sorted( (p*mat+p*mat).eval() )); + VERIFY( is_sorted( res = p.inverse()*mat )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p.inverse()*mat), 0); + res_d = p.inverse()*mat_d; + VERIFY(res.isApprox(res_d) && "inv(p)*mat"); + + VERIFY( is_sorted( (p * mat * p.inverse()).eval() )); + VERIFY( is_sorted( res = mat.twistedBy(p) )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p * mat * p.inverse()), 0); + res_d = (p * mat_d) * p.inverse(); + VERIFY(res.isApprox(res_d) && "p*mat*inv(p)"); + + + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); + res_d = up_sym_d; + VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); + + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); + res_d = lo_sym_d; + VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); + + + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p_null) )); + res_d = up_sym_d; + VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); + + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p_null) )); + res_d = lo_sym_d; + VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); + + + VERIFY( is_sorted( res = mat.template selfadjointView() )); + res_d = up_sym_d; + VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); + + VERIFY( is_sorted( res = mat.template selfadjointView() )); + res_d = lo_sym_d; + VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); + + VERIFY( is_sorted( res = up.template selfadjointView() )); + res_d = up_sym_d; + VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); + + VERIFY( is_sorted( res = lo.template selfadjointView() )); + res_d = lo_sym_d; + VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); + + + res.template selfadjointView() = mat.template selfadjointView(); + res_d = up_sym_d.template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint upper to upper"); + + res.template selfadjointView() = mat.template selfadjointView(); + res_d = up_sym_d.template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint upper to lower"); + + res.template selfadjointView() = mat.template selfadjointView(); + res_d = lo_sym_d.template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint lower to upper"); + + res.template selfadjointView() = mat.template selfadjointView(); + res_d = lo_sym_d.template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint lower to lower"); + + + + res.template selfadjointView() = mat.template selfadjointView().twistedBy(p); + res_d = ((p * up_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint upper twisted to upper"); + + res.template selfadjointView() = mat.template selfadjointView().twistedBy(p); + res_d = ((p * lo_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint lower twisted to upper"); + + res.template selfadjointView() = mat.template selfadjointView().twistedBy(p); + res_d = ((p * lo_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint lower twisted to lower"); + + res.template selfadjointView() = mat.template selfadjointView().twistedBy(p); + res_d = ((p * up_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "full selfadjoint upper twisted to lower"); + + + res.template selfadjointView() = up.template selfadjointView().twistedBy(p); + res_d = ((p * up_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to upper"); + + res.template selfadjointView() = lo.template selfadjointView().twistedBy(p); + res_d = ((p * lo_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "lower selfadjoint twisted to upper"); + + res.template selfadjointView() = lo.template selfadjointView().twistedBy(p); + res_d = ((p * lo_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "lower selfadjoint twisted to lower"); + + res.template selfadjointView() = up.template selfadjointView().twistedBy(p); + res_d = ((p * up_sym_d) * p.inverse()).eval().template triangularView(); + VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to lower"); + + + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); + res_d = (p * up_sym_d) * p.inverse(); + VERIFY(res.isApprox(res_d) && "full selfadjoint upper twisted to full"); + + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); + res_d = (p * lo_sym_d) * p.inverse(); + VERIFY(res.isApprox(res_d) && "full selfadjoint lower twisted to full"); + + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p) )); + res_d = (p * up_sym_d) * p.inverse(); + VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to full"); + + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p) )); + res_d = (p * lo_sym_d) * p.inverse(); + VERIFY(res.isApprox(res_d) && "lower selfadjoint twisted to full"); +} + +template void sparse_permutations_all(int size) +{ + CALL_SUBTEST(( sparse_permutations(SparseMatrix(size,size)) )); + CALL_SUBTEST(( sparse_permutations(SparseMatrix(size,size)) )); + CALL_SUBTEST(( sparse_permutations(SparseMatrix(size,size)) )); + CALL_SUBTEST(( sparse_permutations(SparseMatrix(size,size)) )); +} + +void test_sparse_permutations() +{ + for(int i = 0; i < g_repeat; i++) { + int s = Eigen::internal::random(1,50); + CALL_SUBTEST_1(( sparse_permutations_all(s) )); + CALL_SUBTEST_2(( sparse_permutations_all >(s) )); + } + + VERIFY((internal::is_same,OnTheRight,false,SparseShape>::ReturnType, + internal::nested_eval,PermutationMatrix,AliasFreeProduct>,1>::type>::value)); + + VERIFY((internal::is_same,OnTheLeft,false,SparseShape>::ReturnType, + internal::nested_eval,SparseMatrix,AliasFreeProduct>,1>::type>::value)); +} diff --git a/thirdparty/eigen/test/sparse_product.cpp b/thirdparty/eigen/test/sparse_product.cpp new file mode 100644 index 000000000..c7c93373d --- /dev/null +++ b/thirdparty/eigen/test/sparse_product.cpp @@ -0,0 +1,377 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static long int nb_temporaries; + +inline void on_temporary_creation() { + // here's a great place to set a breakpoint when debugging failures in this test! + nb_temporaries++; +} + +#define EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN { on_temporary_creation(); } + +#include "sparse.h" + +#define VERIFY_EVALUATION_COUNT(XPR,N) {\ + nb_temporaries = 0; \ + CALL_SUBTEST( XPR ); \ + if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ + VERIFY( (#XPR) && nb_temporaries==N ); \ + } + + + +template void sparse_product() +{ + typedef typename SparseMatrixType::StorageIndex StorageIndex; + Index n = 100; + const Index rows = internal::random(1,n); + const Index cols = internal::random(1,n); + const Index depth = internal::random(1,n); + typedef typename SparseMatrixType::Scalar Scalar; + enum { Flags = SparseMatrixType::Flags }; + + double density = (std::max)(8./(rows*cols), 0.2); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + typedef Matrix RowDenseVector; + typedef SparseVector ColSpVector; + typedef SparseVector RowSpVector; + + Scalar s1 = internal::random(); + Scalar s2 = internal::random(); + + // test matrix-matrix product + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, depth); + DenseMatrix refMat2t = DenseMatrix::Zero(depth, rows); + DenseMatrix refMat3 = DenseMatrix::Zero(depth, cols); + DenseMatrix refMat3t = DenseMatrix::Zero(cols, depth); + DenseMatrix refMat4 = DenseMatrix::Zero(rows, cols); + DenseMatrix refMat4t = DenseMatrix::Zero(cols, rows); + DenseMatrix refMat5 = DenseMatrix::Random(depth, cols); + DenseMatrix refMat6 = DenseMatrix::Random(rows, rows); + DenseMatrix dm4 = DenseMatrix::Zero(rows, rows); +// DenseVector dv1 = DenseVector::Random(rows); + SparseMatrixType m2 (rows, depth); + SparseMatrixType m2t(depth, rows); + SparseMatrixType m3 (depth, cols); + SparseMatrixType m3t(cols, depth); + SparseMatrixType m4 (rows, cols); + SparseMatrixType m4t(cols, rows); + SparseMatrixType m6(rows, rows); + initSparse(density, refMat2, m2); + initSparse(density, refMat2t, m2t); + initSparse(density, refMat3, m3); + initSparse(density, refMat3t, m3t); + initSparse(density, refMat4, m4); + initSparse(density, refMat4t, m4t); + initSparse(density, refMat6, m6); + +// int c = internal::random(0,depth-1); + + // sparse * sparse + VERIFY_IS_APPROX(m4=m2*m3, refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(m4=m2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(m4=m2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(m4=m2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose()); + + VERIFY_IS_APPROX(m4 = m2*m3/s1, refMat4 = refMat2*refMat3/s1); + VERIFY_IS_APPROX(m4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); + VERIFY_IS_APPROX(m4 = s2*m2*m3*s1, refMat4 = s2*refMat2*refMat3*s1); + VERIFY_IS_APPROX(m4 = (m2+m2)*m3, refMat4 = (refMat2+refMat2)*refMat3); + VERIFY_IS_APPROX(m4 = m2*m3.leftCols(cols/2), refMat4 = refMat2*refMat3.leftCols(cols/2)); + VERIFY_IS_APPROX(m4 = m2*(m3+m3).leftCols(cols/2), refMat4 = refMat2*(refMat3+refMat3).leftCols(cols/2)); + + VERIFY_IS_APPROX(m4=(m2*m3).pruned(0), refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(m4=(m2t.transpose()*m3).pruned(0), refMat4=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose()); + + // make sure the right product implementation is called: + if((!SparseMatrixType::IsRowMajor) && m2.rows()<=m3.cols()) + { + VERIFY_EVALUATION_COUNT(m4 = m2*m3, 3); // 1 temp for the result + 2 for transposing and get a sorted result. + VERIFY_EVALUATION_COUNT(m4 = (m2*m3).pruned(0), 1); + VERIFY_EVALUATION_COUNT(m4 = (m2*m3).eval().pruned(0), 4); + } + + // and that pruning is effective: + { + DenseMatrix Ad(2,2); + Ad << -1, 1, 1, 1; + SparseMatrixType As(Ad.sparseView()), B(2,2); + VERIFY_IS_EQUAL( (As*As.transpose()).eval().nonZeros(), 4); + VERIFY_IS_EQUAL( (Ad*Ad.transpose()).eval().sparseView().eval().nonZeros(), 2); + VERIFY_IS_EQUAL( (As*As.transpose()).pruned(1e-6).eval().nonZeros(), 2); + } + + // dense ?= sparse * sparse + VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*m3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3, refMat4 =refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3, refMat4+=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3, refMat4-=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3t.transpose(), refMat4 =refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3t.transpose(), refMat4+=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3t.transpose(), refMat4-=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 =m2*m3t.transpose(), refMat4 =refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2*m3t.transpose(), refMat4+=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2*m3t.transpose(), refMat4-=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); + + // test aliasing + m4 = m2; refMat4 = refMat2; + VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3); + + // sparse * dense matrix + VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=m2*refMat3t.transpose(), refMat4=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3, refMat4=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose()); + + VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=dm4+m2*refMat3, refMat4=refMat4+refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=m2*refMat3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*refMat3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()+=m2*refMat3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()-=m2*refMat3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3)); + VERIFY_IS_APPROX(dm4=m2t.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2t.transpose()*(refMat3+refMat5)*0.5); + + // sparse * dense vector + VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3.col(0), refMat4.col(0)=refMat2*refMat3.col(0)); + VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3t.transpose().col(0), refMat4.col(0)=refMat2*refMat3t.transpose().col(0)); + VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3.col(0), refMat4.col(0)=refMat2t.transpose()*refMat3.col(0)); + VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3t.transpose().col(0), refMat4.col(0)=refMat2t.transpose()*refMat3t.transpose().col(0)); + + // dense * sparse + VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=dm4+refMat2*m3, refMat4=refMat4+refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=refMat2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=refMat2*m3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()+=refMat2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()-=refMat2*m3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=refMat2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose()); + + // sparse * dense and dense * sparse outer product + { + Index c = internal::random(0,depth-1); + Index r = internal::random(0,rows-1); + Index c1 = internal::random(0,cols-1); + Index r1 = internal::random(0,depth-1); + DenseMatrix dm5 = DenseMatrix::Random(depth, cols); + + VERIFY_IS_APPROX( m4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX( m4=m2.middleCols(c,1)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose()); + + VERIFY_IS_APPROX(m4=dm5.col(c1)*m2.col(c).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(m4=dm5.col(c1)*m2.middleCols(c,1).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.col(c1)*m2.col(c).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose()); + + VERIFY_IS_APPROX( m4=dm5.row(r1).transpose()*m2.col(c).transpose(), refMat4=dm5.row(r1).transpose()*refMat2.col(c).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.row(r1).transpose()*m2.col(c).transpose(), refMat4=dm5.row(r1).transpose()*refMat2.col(c).transpose()); + + VERIFY_IS_APPROX( m4=m2.row(r).transpose()*dm5.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX( m4=m2.middleRows(r,1).transpose()*dm5.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=m2.row(r).transpose()*dm5.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*dm5.col(c1).transpose()); + + VERIFY_IS_APPROX( m4=dm5.col(c1)*m2.row(r), refMat4=dm5.col(c1)*refMat2.row(r)); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX( m4=dm5.col(c1)*m2.middleRows(r,1), refMat4=dm5.col(c1)*refMat2.row(r)); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.col(c1)*m2.row(r), refMat4=dm5.col(c1)*refMat2.row(r)); + + VERIFY_IS_APPROX( m4=dm5.row(r1).transpose()*m2.row(r), refMat4=dm5.row(r1).transpose()*refMat2.row(r)); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.row(r1).transpose()*m2.row(r), refMat4=dm5.row(r1).transpose()*refMat2.row(r)); + } + + VERIFY_IS_APPROX(m6=m6*m6, refMat6=refMat6*refMat6); + + // sparse matrix * sparse vector + ColSpVector cv0(cols), cv1; + DenseVector dcv0(cols), dcv1; + initSparse(2*density,dcv0, cv0); + + RowSpVector rv0(depth), rv1; + RowDenseVector drv0(depth), drv1(rv1); + initSparse(2*density,drv0, rv0); + + VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0); + VERIFY_IS_APPROX(rv1=rv0*m3, drv1=drv0*refMat3); + VERIFY_IS_APPROX(cv1=m3t.adjoint()*cv0, dcv1=refMat3t.adjoint()*dcv0); + VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3); + VERIFY_IS_APPROX(rv1=m3*cv0, drv1=refMat3*dcv0); + } + + // test matrix - diagonal product + { + DenseMatrix refM2 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM3 = DenseMatrix::Zero(rows, cols); + DenseMatrix d3 = DenseMatrix::Zero(rows, cols); + DiagonalMatrix d1(DenseVector::Random(cols)); + DiagonalMatrix d2(DenseVector::Random(rows)); + SparseMatrixType m2(rows, cols); + SparseMatrixType m3(rows, cols); + initSparse(density, refM2, m2); + initSparse(density, refM3, m3); + VERIFY_IS_APPROX(m3=m2*d1, refM3=refM2*d1); + VERIFY_IS_APPROX(m3=m2.transpose()*d2, refM3=refM2.transpose()*d2); + VERIFY_IS_APPROX(m3=d2*m2, refM3=d2*refM2); + VERIFY_IS_APPROX(m3=d1*m2.transpose(), refM3=d1*refM2.transpose()); + + // also check with a SparseWrapper: + DenseVector v1 = DenseVector::Random(cols); + DenseVector v2 = DenseVector::Random(rows); + VERIFY_IS_APPROX(m3=m2*v1.asDiagonal(), refM3=refM2*v1.asDiagonal()); + VERIFY_IS_APPROX(m3=m2.transpose()*v2.asDiagonal(), refM3=refM2.transpose()*v2.asDiagonal()); + VERIFY_IS_APPROX(m3=v2.asDiagonal()*m2, refM3=v2.asDiagonal()*refM2); + VERIFY_IS_APPROX(m3=v1.asDiagonal()*m2.transpose(), refM3=v1.asDiagonal()*refM2.transpose()); + + VERIFY_IS_APPROX(m3=v2.asDiagonal()*m2*v1.asDiagonal(), refM3=v2.asDiagonal()*refM2*v1.asDiagonal()); + + // evaluate to a dense matrix to check the .row() and .col() iterator functions + VERIFY_IS_APPROX(d3=m2*d1, refM3=refM2*d1); + VERIFY_IS_APPROX(d3=m2.transpose()*d2, refM3=refM2.transpose()*d2); + VERIFY_IS_APPROX(d3=d2*m2, refM3=d2*refM2); + VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose()); + } + + // test self-adjoint and triangular-view products + { + DenseMatrix b = DenseMatrix::Random(rows, rows); + DenseMatrix x = DenseMatrix::Random(rows, rows); + DenseMatrix refX = DenseMatrix::Random(rows, rows); + DenseMatrix refUp = DenseMatrix::Zero(rows, rows); + DenseMatrix refLo = DenseMatrix::Zero(rows, rows); + DenseMatrix refS = DenseMatrix::Zero(rows, rows); + DenseMatrix refA = DenseMatrix::Zero(rows, rows); + SparseMatrixType mUp(rows, rows); + SparseMatrixType mLo(rows, rows); + SparseMatrixType mS(rows, rows); + SparseMatrixType mA(rows, rows); + initSparse(density, refA, mA); + do { + initSparse(density, refUp, mUp, ForceRealDiag|/*ForceNonZeroDiag|*/MakeUpperTriangular); + } while (refUp.isZero()); + refLo = refUp.adjoint(); + mLo = mUp.adjoint(); + refS = refUp + refLo; + refS.diagonal() *= 0.5; + mS = mUp + mLo; + // TODO be able to address the diagonal.... + for (int k=0; k()*b, refX=refS*b); + VERIFY_IS_APPROX(x=mLo.template selfadjointView()*b, refX=refS*b); + VERIFY_IS_APPROX(x=mS.template selfadjointView()*b, refX=refS*b); + + VERIFY_IS_APPROX(x.noalias()+=mUp.template selfadjointView()*b, refX+=refS*b); + VERIFY_IS_APPROX(x.noalias()-=mLo.template selfadjointView()*b, refX-=refS*b); + VERIFY_IS_APPROX(x.noalias()+=mS.template selfadjointView()*b, refX+=refS*b); + + // sparse selfadjointView with sparse matrices + SparseMatrixType mSres(rows,rows); + VERIFY_IS_APPROX(mSres = mLo.template selfadjointView()*mS, + refX = refLo.template selfadjointView()*refS); + VERIFY_IS_APPROX(mSres = mS * mLo.template selfadjointView(), + refX = refS * refLo.template selfadjointView()); + + // sparse triangularView with dense matrices + VERIFY_IS_APPROX(x=mA.template triangularView()*b, refX=refA.template triangularView()*b); + VERIFY_IS_APPROX(x=mA.template triangularView()*b, refX=refA.template triangularView()*b); + VERIFY_IS_APPROX(x=b*mA.template triangularView(), refX=b*refA.template triangularView()); + VERIFY_IS_APPROX(x=b*mA.template triangularView(), refX=b*refA.template triangularView()); + + // sparse triangularView with sparse matrices + VERIFY_IS_APPROX(mSres = mA.template triangularView()*mS, refX = refA.template triangularView()*refS); + VERIFY_IS_APPROX(mSres = mS * mA.template triangularView(), refX = refS * refA.template triangularView()); + VERIFY_IS_APPROX(mSres = mA.template triangularView()*mS, refX = refA.template triangularView()*refS); + VERIFY_IS_APPROX(mSres = mS * mA.template triangularView(), refX = refS * refA.template triangularView()); + } +} + +// New test for Bug in SparseTimeDenseProduct +template void sparse_product_regression_test() +{ + // This code does not compile with afflicted versions of the bug + SparseMatrixType sm1(3,2); + DenseMatrixType m2(2,2); + sm1.setZero(); + m2.setZero(); + + DenseMatrixType m3 = sm1*m2; + + + // This code produces a segfault with afflicted versions of another SparseTimeDenseProduct + // bug + + SparseMatrixType sm2(20000,2); + sm2.setZero(); + DenseMatrixType m4(sm2*m2); + + VERIFY_IS_APPROX( m4(0,0), 0.0 ); +} + +template +void bug_942() +{ + typedef Matrix Vector; + typedef SparseMatrix ColSpMat; + typedef SparseMatrix RowSpMat; + ColSpMat cmA(1,1); + cmA.insert(0,0) = 1; + + RowSpMat rmA(1,1); + rmA.insert(0,0) = 1; + + Vector d(1); + d[0] = 2; + + double res = 2; + + VERIFY_IS_APPROX( ( cmA*d.asDiagonal() ).eval().coeff(0,0), res ); + VERIFY_IS_APPROX( ( d.asDiagonal()*rmA ).eval().coeff(0,0), res ); + VERIFY_IS_APPROX( ( rmA*d.asDiagonal() ).eval().coeff(0,0), res ); + VERIFY_IS_APPROX( ( d.asDiagonal()*cmA ).eval().coeff(0,0), res ); +} + +void test_sparse_product() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( (sparse_product >()) ); + CALL_SUBTEST_1( (sparse_product >()) ); + CALL_SUBTEST_1( (bug_942()) ); + CALL_SUBTEST_2( (sparse_product, ColMajor > >()) ); + CALL_SUBTEST_2( (sparse_product, RowMajor > >()) ); + CALL_SUBTEST_3( (sparse_product >()) ); + CALL_SUBTEST_4( (sparse_product_regression_test, Matrix >()) ); + } +} diff --git a/thirdparty/eigen/test/sparse_ref.cpp b/thirdparty/eigen/test/sparse_ref.cpp new file mode 100644 index 000000000..5e9607234 --- /dev/null +++ b/thirdparty/eigen/test/sparse_ref.cpp @@ -0,0 +1,139 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// This unit test cannot be easily written to work with EIGEN_DEFAULT_TO_ROW_MAJOR +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif + +static long int nb_temporaries; + +inline void on_temporary_creation() { + // here's a great place to set a breakpoint when debugging failures in this test! + nb_temporaries++; +} + +#define EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN { on_temporary_creation(); } + +#include "main.h" +#include + +#define VERIFY_EVALUATION_COUNT(XPR,N) {\ + nb_temporaries = 0; \ + CALL_SUBTEST( XPR ); \ + if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ + VERIFY( (#XPR) && nb_temporaries==N ); \ + } + +template void check_const_correctness(const PlainObjectType&) +{ + // verify that ref-to-const don't have LvalueBit + typedef typename internal::add_const::type ConstPlainObjectType; + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(Ref::Flags & LvalueBit) ); + VERIFY( !(Ref::Flags & LvalueBit) ); +} + +template +EIGEN_DONT_INLINE void call_ref_1(Ref > a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_2(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_3(const Ref, StandardCompressedFormat>& a, const B &b) { + VERIFY(a.isCompressed()); + VERIFY_IS_EQUAL(a.toDense(),b.toDense()); +} + +template +EIGEN_DONT_INLINE void call_ref_4(Ref > a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_5(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +void call_ref() +{ + SparseMatrix A = MatrixXf::Random(10,10).sparseView(0.5,1); + SparseMatrix B = MatrixXf::Random(10,10).sparseView(0.5,1); + SparseMatrix C = MatrixXf::Random(10,10).sparseView(0.5,1); + C.reserve(VectorXi::Constant(C.outerSize(), 2)); + const SparseMatrix& Ac(A); + Block > Ab(A,0,1, 3,3); + const Block > Abc(A,0,1,3,3); + SparseVector vc = VectorXf::Random(10).sparseView(0.5,1); + SparseVector vr = VectorXf::Random(10).sparseView(0.5,1); + SparseMatrix AA = A*A; + + + VERIFY_EVALUATION_COUNT( call_ref_1(A, A), 0); +// VERIFY_EVALUATION_COUNT( call_ref_1(Ac, Ac), 0); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_2(A, A), 0); + VERIFY_EVALUATION_COUNT( call_ref_3(A, A), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(A.transpose(), A.transpose()), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(A.transpose(), A.transpose()), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(Ac,Ac), 0); + VERIFY_EVALUATION_COUNT( call_ref_3(Ac,Ac), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(A+A,2*Ac), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(A+A,2*Ac), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(B, B), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(B, B), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(B.transpose(), B.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_3(B.transpose(), B.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(A*A, AA), 3); + VERIFY_EVALUATION_COUNT( call_ref_3(A*A, AA), 3); + + VERIFY(!C.isCompressed()); + VERIFY_EVALUATION_COUNT( call_ref_3(C, C), 1); + + Ref > Ar(A); + VERIFY_IS_APPROX(Ar+Ar, A+A); + VERIFY_EVALUATION_COUNT( call_ref_1(Ar, A), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(Ar, A), 0); + + Ref > Br(B); + VERIFY_EVALUATION_COUNT( call_ref_1(Br.transpose(), Br.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(Br, Br), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(Br.transpose(), Br.transpose()), 0); + + Ref > Arc(A); +// VERIFY_EVALUATION_COUNT( call_ref_1(Arc, Arc), 0); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_2(Arc, Arc), 0); + + VERIFY_EVALUATION_COUNT( call_ref_2(A.middleCols(1,3), A.middleCols(1,3)), 0); + + VERIFY_EVALUATION_COUNT( call_ref_2(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr.transpose(), vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr, vr.transpose()), 0); + + VERIFY_EVALUATION_COUNT( call_ref_2(A.block(1,1,3,3), A.block(1,1,3,3)), 1); // should be 0 (allocate starts/nnz only) + + VERIFY_EVALUATION_COUNT( call_ref_4(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(A.col(2), A.col(2)), 0); + // VERIFY_EVALUATION_COUNT( call_ref_4(A.row(2), A.row(2).transpose()), 1); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_5(A.row(2), A.row(2).transpose()), 1); +} + +void test_sparse_ref() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); + CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); + CALL_SUBTEST_2( call_ref() ); + + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); + } +} diff --git a/thirdparty/eigen/test/sparse_solver.h b/thirdparty/eigen/test/sparse_solver.h new file mode 100644 index 000000000..5145bc3eb --- /dev/null +++ b/thirdparty/eigen/test/sparse_solver.h @@ -0,0 +1,565 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" +#include +#include + +template +void solve_with_guess(IterativeSolverBase& solver, const MatrixBase& b, const Guess& g, Result &x) { + if(internal::random()) + { + // With a temporary through evaluator + x = solver.derived().solveWithGuess(b,g) + Result::Zero(x.rows(), x.cols()); + } + else + { + // direct evaluation within x through Assignment + x = solver.derived().solveWithGuess(b.derived(),g); + } +} + +template +void solve_with_guess(SparseSolverBase& solver, const MatrixBase& b, const Guess& , Result& x) { + if(internal::random()) + x = solver.derived().solve(b) + Result::Zero(x.rows(), x.cols()); + else + x = solver.derived().solve(b); +} + +template +void solve_with_guess(SparseSolverBase& solver, const SparseMatrixBase& b, const Guess& , Result& x) { + x = solver.derived().solve(b); +} + +template +void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, const Rhs& b, const DenseMat& dA, const DenseRhs& db) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef typename Mat::StorageIndex StorageIndex; + + DenseRhs refX = dA.householderQr().solve(db); + { + Rhs x(A.cols(), b.cols()); + Rhs oldb = b; + + solver.compute(A); + if (solver.info() != Success) + { + std::cerr << "ERROR | sparse solver testing, factorization failed (" << typeid(Solver).name() << ")\n"; + VERIFY(solver.info() == Success); + } + x = solver.solve(b); + if (solver.info() != Success) + { + std::cerr << "WARNING | sparse solver testing: solving failed (" << typeid(Solver).name() << ")\n"; + return; + } + VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + + x.setZero(); + solve_with_guess(solver, b, x, x); + VERIFY(solver.info() == Success && "solving failed when using analyzePattern/factorize API"); + VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + + x.setZero(); + // test the analyze/factorize API + solver.analyzePattern(A); + solver.factorize(A); + VERIFY(solver.info() == Success && "factorization failed when using analyzePattern/factorize API"); + x = solver.solve(b); + VERIFY(solver.info() == Success && "solving failed when using analyzePattern/factorize API"); + VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + + x.setZero(); + // test with Map + MappedSparseMatrix Am(A.rows(), A.cols(), A.nonZeros(), const_cast(A.outerIndexPtr()), const_cast(A.innerIndexPtr()), const_cast(A.valuePtr())); + solver.compute(Am); + VERIFY(solver.info() == Success && "factorization failed when using Map"); + DenseRhs dx(refX); + dx.setZero(); + Map xm(dx.data(), dx.rows(), dx.cols()); + Map bm(db.data(), db.rows(), db.cols()); + xm = solver.solve(bm); + VERIFY(solver.info() == Success && "solving failed when using Map"); + VERIFY(oldb.isApprox(bm) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(xm.isApprox(refX,test_precision())); + } + + // if not too large, do some extra check: + if(A.rows()<2000) + { + // test initialization ctor + { + Rhs x(b.rows(), b.cols()); + Solver solver2(A); + VERIFY(solver2.info() == Success); + x = solver2.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test dense Block as the result and rhs: + { + DenseRhs x(refX.rows(), refX.cols()); + DenseRhs oldb(db); + x.setZero(); + x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); + VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test uncompressed inputs + { + Mat A2 = A; + A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast().eval()); + solver.compute(A2); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test expression as input + { + solver.compute(0.5*(A+A)); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + + Solver solver2(0.5*(A+A)); + Rhs x2 = solver2.solve(b); + VERIFY(x2.isApprox(refX,test_precision())); + } + } +} + +template +void check_sparse_solving_real_cases(Solver& solver, const typename Solver::MatrixType& A, const Rhs& b, const typename Solver::MatrixType& fullA, const Rhs& refX) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef typename Mat::RealScalar RealScalar; + + Rhs x(A.cols(), b.cols()); + + solver.compute(A); + if (solver.info() != Success) + { + std::cerr << "ERROR | sparse solver testing, factorization failed (" << typeid(Solver).name() << ")\n"; + VERIFY(solver.info() == Success); + } + x = solver.solve(b); + + if (solver.info() != Success) + { + std::cerr << "WARNING | sparse solver testing, solving failed (" << typeid(Solver).name() << ")\n"; + return; + } + + RealScalar res_error = (fullA*x-b).norm()/b.norm(); + VERIFY( (res_error <= test_precision() ) && "sparse solver failed without noticing it"); + + + if(refX.size() != 0 && (refX - x).norm()/refX.norm() > test_precision()) + { + std::cerr << "WARNING | found solution is different from the provided reference one\n"; + } + +} +template +void check_sparse_determinant(Solver& solver, const typename Solver::MatrixType& A, const DenseMat& dA) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + + solver.compute(A); + if (solver.info() != Success) + { + std::cerr << "WARNING | sparse solver testing: factorization failed (check_sparse_determinant)\n"; + return; + } + + Scalar refDet = dA.determinant(); + VERIFY_IS_APPROX(refDet,solver.determinant()); +} +template +void check_sparse_abs_determinant(Solver& solver, const typename Solver::MatrixType& A, const DenseMat& dA) +{ + using std::abs; + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + + solver.compute(A); + if (solver.info() != Success) + { + std::cerr << "WARNING | sparse solver testing: factorization failed (check_sparse_abs_determinant)\n"; + return; + } + + Scalar refDet = abs(dA.determinant()); + VERIFY_IS_APPROX(refDet,solver.absDeterminant()); +} + +template +int generate_sparse_spd_problem(Solver& , typename Solver::MatrixType& A, typename Solver::MatrixType& halfA, DenseMat& dA, int maxSize = 300) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef Matrix DenseMatrix; + + int size = internal::random(1,maxSize); + double density = (std::max)(8./(size*size), 0.01); + + Mat M(size, size); + DenseMatrix dM(size, size); + + initSparse(density, dM, M, ForceNonZeroDiag); + + A = M * M.adjoint(); + dA = dM * dM.adjoint(); + + halfA.resize(size,size); + if(Solver::UpLo==(Lower|Upper)) + halfA = A; + else + halfA.template selfadjointView().rankUpdate(M); + + return size; +} + + +#ifdef TEST_REAL_CASES +template +inline std::string get_matrixfolder() +{ + std::string mat_folder = TEST_REAL_CASES; + if( internal::is_same >::value || internal::is_same >::value ) + mat_folder = mat_folder + static_cast("/complex/"); + else + mat_folder = mat_folder + static_cast("/real/"); + return mat_folder; +} +std::string sym_to_string(int sym) +{ + if(sym==Symmetric) return "Symmetric "; + if(sym==SPD) return "SPD "; + return ""; +} +template +std::string solver_stats(const IterativeSolverBase &solver) +{ + std::stringstream ss; + ss << solver.iterations() << " iters, error: " << solver.error(); + return ss.str(); +} +template +std::string solver_stats(const SparseSolverBase &/*solver*/) +{ + return ""; +} +#endif + +template void check_sparse_spd_solving(Solver& solver, int maxSize = 300, int maxRealWorldSize = 100000) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef typename Mat::StorageIndex StorageIndex; + typedef SparseMatrix SpMat; + typedef SparseVector SpVec; + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + + // generate the problem + Mat A, halfA; + DenseMatrix dA; + for (int i = 0; i < g_repeat; i++) { + int size = generate_sparse_spd_problem(solver, A, halfA, dA, maxSize); + + // generate the right hand sides + int rhsCols = internal::random(1,16); + double density = (std::max)(8./(size*rhsCols), 0.1); + SpMat B(size,rhsCols); + DenseVector b = DenseVector::Random(size); + DenseMatrix dB(size,rhsCols); + initSparse(density, dB, B, ForceNonZeroDiag); + SpVec c = B.col(0); + DenseVector dc = dB.col(0); + + CALL_SUBTEST( check_sparse_solving(solver, A, b, dA, b) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, b, dA, b) ); + CALL_SUBTEST( check_sparse_solving(solver, A, dB, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, dB, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, A, B, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, B, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, A, c, dA, dc) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, c, dA, dc) ); + + // check only once + if(i==0) + { + b = DenseVector::Zero(size); + check_sparse_solving(solver, A, b, dA, b); + } + } + + // First, get the folder +#ifdef TEST_REAL_CASES + // Test real problems with double precision only + if (internal::is_same::Real, double>::value) + { + std::string mat_folder = get_matrixfolder(); + MatrixMarketIterator it(mat_folder); + for (; it; ++it) + { + if (it.sym() == SPD){ + A = it.matrix(); + if(A.diagonal().size() <= maxRealWorldSize) + { + DenseVector b = it.rhs(); + DenseVector refX = it.refX(); + PermutationMatrix pnull; + halfA.resize(A.rows(), A.cols()); + if(Solver::UpLo == (Lower|Upper)) + halfA = A; + else + halfA.template selfadjointView() = A.template triangularView().twistedBy(pnull); + + std::cout << "INFO | Testing " << sym_to_string(it.sym()) << "sparse problem " << it.matname() + << " (" << A.rows() << "x" << A.cols() << ") using " << typeid(Solver).name() << "..." << std::endl; + CALL_SUBTEST( check_sparse_solving_real_cases(solver, A, b, A, refX) ); + std::string stats = solver_stats(solver); + if(stats.size()>0) + std::cout << "INFO | " << stats << std::endl; + CALL_SUBTEST( check_sparse_solving_real_cases(solver, halfA, b, A, refX) ); + } + else + { + std::cout << "INFO | Skip sparse problem \"" << it.matname() << "\" (too large)" << std::endl; + } + } + } + } +#else + EIGEN_UNUSED_VARIABLE(maxRealWorldSize); +#endif +} + +template void check_sparse_spd_determinant(Solver& solver) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef Matrix DenseMatrix; + + // generate the problem + Mat A, halfA; + DenseMatrix dA; + generate_sparse_spd_problem(solver, A, halfA, dA, 30); + + for (int i = 0; i < g_repeat; i++) { + check_sparse_determinant(solver, A, dA); + check_sparse_determinant(solver, halfA, dA ); + } +} + +template +Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + + Index size = internal::random(1,maxSize); + double density = (std::max)(8./(size*size), 0.01); + + A.resize(size,size); + dA.resize(size,size); + + initSparse(density, dA, A, options); + + return size; +} + + +struct prune_column { + Index m_col; + prune_column(Index col) : m_col(col) {} + template + bool operator()(Index, Index col, const Scalar&) const { + return col != m_col; + } +}; + + +template void check_sparse_square_solving(Solver& solver, int maxSize = 300, int maxRealWorldSize = 100000, bool checkDeficient = false) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef SparseMatrix SpMat; + typedef SparseVector SpVec; + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + + int rhsCols = internal::random(1,16); + + Mat A; + DenseMatrix dA; + for (int i = 0; i < g_repeat; i++) { + Index size = generate_sparse_square_problem(solver, A, dA, maxSize); + + A.makeCompressed(); + DenseVector b = DenseVector::Random(size); + DenseMatrix dB(size,rhsCols); + SpMat B(size,rhsCols); + double density = (std::max)(8./(size*rhsCols), 0.1); + initSparse(density, dB, B, ForceNonZeroDiag); + B.makeCompressed(); + SpVec c = B.col(0); + DenseVector dc = dB.col(0); + CALL_SUBTEST(check_sparse_solving(solver, A, b, dA, b)); + CALL_SUBTEST(check_sparse_solving(solver, A, dB, dA, dB)); + CALL_SUBTEST(check_sparse_solving(solver, A, B, dA, dB)); + CALL_SUBTEST(check_sparse_solving(solver, A, c, dA, dc)); + + // check only once + if(i==0) + { + b = DenseVector::Zero(size); + check_sparse_solving(solver, A, b, dA, b); + } + // regression test for Bug 792 (structurally rank deficient matrices): + if(checkDeficient && size>1) { + Index col = internal::random(0,int(size-1)); + A.prune(prune_column(col)); + solver.compute(A); + VERIFY_IS_EQUAL(solver.info(), NumericalIssue); + } + } + + // First, get the folder +#ifdef TEST_REAL_CASES + // Test real problems with double precision only + if (internal::is_same::Real, double>::value) + { + std::string mat_folder = get_matrixfolder(); + MatrixMarketIterator it(mat_folder); + for (; it; ++it) + { + A = it.matrix(); + if(A.diagonal().size() <= maxRealWorldSize) + { + DenseVector b = it.rhs(); + DenseVector refX = it.refX(); + std::cout << "INFO | Testing " << sym_to_string(it.sym()) << "sparse problem " << it.matname() + << " (" << A.rows() << "x" << A.cols() << ") using " << typeid(Solver).name() << "..." << std::endl; + CALL_SUBTEST(check_sparse_solving_real_cases(solver, A, b, A, refX)); + std::string stats = solver_stats(solver); + if(stats.size()>0) + std::cout << "INFO | " << stats << std::endl; + } + else + { + std::cout << "INFO | SKIP sparse problem \"" << it.matname() << "\" (too large)" << std::endl; + } + } + } +#else + EIGEN_UNUSED_VARIABLE(maxRealWorldSize); +#endif + +} + +template void check_sparse_square_determinant(Solver& solver) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef Matrix DenseMatrix; + + for (int i = 0; i < g_repeat; i++) { + // generate the problem + Mat A; + DenseMatrix dA; + + int size = internal::random(1,30); + dA.setRandom(size,size); + + dA = (dA.array().abs()<0.3).select(0,dA); + dA.diagonal() = (dA.diagonal().array()==0).select(1,dA.diagonal()); + A = dA.sparseView(); + A.makeCompressed(); + + check_sparse_determinant(solver, A, dA); + } +} + +template void check_sparse_square_abs_determinant(Solver& solver) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef Matrix DenseMatrix; + + for (int i = 0; i < g_repeat; i++) { + // generate the problem + Mat A; + DenseMatrix dA; + generate_sparse_square_problem(solver, A, dA, 30); + A.makeCompressed(); + check_sparse_abs_determinant(solver, A, dA); + } +} + +template +void generate_sparse_leastsquare_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + + int rows = internal::random(1,maxSize); + int cols = internal::random(1,rows); + double density = (std::max)(8./(rows*cols), 0.01); + + A.resize(rows,cols); + dA.resize(rows,cols); + + initSparse(density, dA, A, options); +} + +template void check_sparse_leastsquare_solving(Solver& solver) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef SparseMatrix SpMat; + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + + int rhsCols = internal::random(1,16); + + Mat A; + DenseMatrix dA; + for (int i = 0; i < g_repeat; i++) { + generate_sparse_leastsquare_problem(solver, A, dA); + + A.makeCompressed(); + DenseVector b = DenseVector::Random(A.rows()); + DenseMatrix dB(A.rows(),rhsCols); + SpMat B(A.rows(),rhsCols); + double density = (std::max)(8./(A.rows()*rhsCols), 0.1); + initSparse(density, dB, B, ForceNonZeroDiag); + B.makeCompressed(); + check_sparse_solving(solver, A, b, dA, b); + check_sparse_solving(solver, A, dB, dA, dB); + check_sparse_solving(solver, A, B, dA, dB); + + // check only once + if(i==0) + { + b = DenseVector::Zero(A.rows()); + check_sparse_solving(solver, A, b, dA, b); + } + } +} diff --git a/thirdparty/eigen/test/sparse_solvers.cpp b/thirdparty/eigen/test/sparse_solvers.cpp new file mode 100644 index 000000000..3a8873d43 --- /dev/null +++ b/thirdparty/eigen/test/sparse_solvers.cpp @@ -0,0 +1,112 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" + +template void +initSPD(double density, + Matrix& refMat, + SparseMatrix& sparseMat) +{ + Matrix aux(refMat.rows(),refMat.cols()); + initSparse(density,refMat,sparseMat); + refMat = refMat * refMat.adjoint(); + for (int k=0; k<2; ++k) + { + initSparse(density,aux,sparseMat,ForceNonZeroDiag); + refMat += aux * aux.adjoint(); + } + sparseMat.setZero(); + for (int j=0 ; j void sparse_solvers(int rows, int cols) +{ + double density = (std::max)(8./(rows*cols), 0.01); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + // Scalar eps = 1e-6; + + DenseVector vec1 = DenseVector::Random(rows); + + std::vector zeroCoords; + std::vector nonzeroCoords; + + // test triangular solver + { + DenseVector vec2 = vec1, vec3 = vec1; + SparseMatrix m2(rows, cols); + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + + // lower - dense + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords); + VERIFY_IS_APPROX(refMat2.template triangularView().solve(vec2), + m2.template triangularView().solve(vec3)); + + // upper - dense + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords); + VERIFY_IS_APPROX(refMat2.template triangularView().solve(vec2), + m2.template triangularView().solve(vec3)); + VERIFY_IS_APPROX(refMat2.conjugate().template triangularView().solve(vec2), + m2.conjugate().template triangularView().solve(vec3)); + { + SparseMatrix cm2(m2); + //Index rows, Index cols, Index nnz, Index* outerIndexPtr, Index* innerIndexPtr, Scalar* valuePtr + MappedSparseMatrix mm2(rows, cols, cm2.nonZeros(), cm2.outerIndexPtr(), cm2.innerIndexPtr(), cm2.valuePtr()); + VERIFY_IS_APPROX(refMat2.conjugate().template triangularView().solve(vec2), + mm2.conjugate().template triangularView().solve(vec3)); + } + + // lower - transpose + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords); + VERIFY_IS_APPROX(refMat2.transpose().template triangularView().solve(vec2), + m2.transpose().template triangularView().solve(vec3)); + + // upper - transpose + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords); + VERIFY_IS_APPROX(refMat2.transpose().template triangularView().solve(vec2), + m2.transpose().template triangularView().solve(vec3)); + + SparseMatrix matB(rows, rows); + DenseMatrix refMatB = DenseMatrix::Zero(rows, rows); + + // lower - sparse + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular); + initSparse(density, refMatB, matB); + refMat2.template triangularView().solveInPlace(refMatB); + m2.template triangularView().solveInPlace(matB); + VERIFY_IS_APPROX(matB.toDense(), refMatB); + + // upper - sparse + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular); + initSparse(density, refMatB, matB); + refMat2.template triangularView().solveInPlace(refMatB); + m2.template triangularView().solveInPlace(matB); + VERIFY_IS_APPROX(matB, refMatB); + + // test deprecated API + initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords); + VERIFY_IS_APPROX(refMat2.template triangularView().solve(vec2), + m2.template triangularView().solve(vec3)); + } +} + +void test_sparse_solvers() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(sparse_solvers(8, 8) ); + int s = internal::random(1,300); + CALL_SUBTEST_2(sparse_solvers >(s,s) ); + CALL_SUBTEST_1(sparse_solvers(s,s) ); + } +} diff --git a/thirdparty/eigen/test/sparse_vector.cpp b/thirdparty/eigen/test/sparse_vector.cpp new file mode 100644 index 000000000..b3e1dda25 --- /dev/null +++ b/thirdparty/eigen/test/sparse_vector.cpp @@ -0,0 +1,163 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" + +template void sparse_vector(int rows, int cols) +{ + double densityMat = (std::max)(8./(rows*cols), 0.01); + double densityVec = (std::max)(8./(rows), 0.1); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + typedef SparseVector SparseVectorType; + typedef SparseMatrix SparseMatrixType; + Scalar eps = 1e-6; + + SparseMatrixType m1(rows,rows); + SparseVectorType v1(rows), v2(rows), v3(rows); + DenseMatrix refM1 = DenseMatrix::Zero(rows, rows); + DenseVector refV1 = DenseVector::Random(rows), + refV2 = DenseVector::Random(rows), + refV3 = DenseVector::Random(rows); + + std::vector zerocoords, nonzerocoords; + initSparse(densityVec, refV1, v1, &zerocoords, &nonzerocoords); + initSparse(densityMat, refM1, m1); + + initSparse(densityVec, refV2, v2); + initSparse(densityVec, refV3, v3); + + Scalar s1 = internal::random(); + + // test coeff and coeffRef + for (unsigned int i=0; i(0,rows-1); + Scalar v = internal::random(); + v4.coeffRef(i) += v; + v5.coeffRef(i) += v; + } + VERIFY_IS_APPROX(v4,v5); + } + + v1.coeffRef(nonzerocoords[0]) = Scalar(5); + refV1.coeffRef(nonzerocoords[0]) = Scalar(5); + VERIFY_IS_APPROX(v1, refV1); + + VERIFY_IS_APPROX(v1+v2, refV1+refV2); + VERIFY_IS_APPROX(v1+v2+v3, refV1+refV2+refV3); + + VERIFY_IS_APPROX(v1*s1-v2, refV1*s1-refV2); + + VERIFY_IS_APPROX(v1*=s1, refV1*=s1); + VERIFY_IS_APPROX(v1/=s1, refV1/=s1); + + VERIFY_IS_APPROX(v1+=v2, refV1+=refV2); + VERIFY_IS_APPROX(v1-=v2, refV1-=refV2); + + VERIFY_IS_APPROX(v1.dot(v2), refV1.dot(refV2)); + VERIFY_IS_APPROX(v1.dot(refV2), refV1.dot(refV2)); + + VERIFY_IS_APPROX(m1*v2, refM1*refV2); + VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2)); + { + int i = internal::random(0,rows-1); + VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i))); + } + + + VERIFY_IS_APPROX(v1.squaredNorm(), refV1.squaredNorm()); + + VERIFY_IS_APPROX(v1.blueNorm(), refV1.blueNorm()); + + // test aliasing + VERIFY_IS_APPROX((v1 = -v1), (refV1 = -refV1)); + VERIFY_IS_APPROX((v1 = v1.transpose()), (refV1 = refV1.transpose().eval())); + VERIFY_IS_APPROX((v1 += -v1), (refV1 += -refV1)); + + // sparse matrix to sparse vector + SparseMatrixType mv1; + VERIFY_IS_APPROX((mv1=v1),v1); + VERIFY_IS_APPROX(mv1,(v1=mv1)); + VERIFY_IS_APPROX(mv1,(v1=mv1.transpose())); + + // check copy to dense vector with transpose + refV3.resize(0); + VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense()); + VERIFY_IS_APPROX(DenseVector(v1),v1.toDense()); + + // test conservative resize + { + std::vector inc; + if(rows > 3) + inc.push_back(-3); + inc.push_back(0); + inc.push_back(3); + inc.push_back(1); + inc.push_back(10); + + for(std::size_t i = 0; i< inc.size(); i++) { + StorageIndex incRows = inc[i]; + SparseVectorType vec1(rows); + DenseVector refVec1 = DenseVector::Zero(rows); + initSparse(densityVec, refVec1, vec1); + + vec1.conservativeResize(rows+incRows); + refVec1.conservativeResize(rows+incRows); + if (incRows > 0) refVec1.tail(incRows).setZero(); + + VERIFY_IS_APPROX(vec1, refVec1); + + // Insert new values + if (incRows > 0) + vec1.insert(vec1.rows()-1) = refVec1(refVec1.rows()-1) = 1; + + VERIFY_IS_APPROX(vec1, refVec1); + } + } + +} + +void test_sparse_vector() +{ + for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,500), c = Eigen::internal::random(1,500); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + + CALL_SUBTEST_1(( sparse_vector(8, 8) )); + CALL_SUBTEST_2(( sparse_vector, int>(r, c) )); + CALL_SUBTEST_1(( sparse_vector(r, c) )); + CALL_SUBTEST_1(( sparse_vector(r, c) )); + } +} + diff --git a/thirdparty/eigen/test/sparselu.cpp b/thirdparty/eigen/test/sparselu.cpp new file mode 100644 index 000000000..bd000baf1 --- /dev/null +++ b/thirdparty/eigen/test/sparselu.cpp @@ -0,0 +1,45 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// SparseLU solve does not accept column major matrices for the destination. +// However, as expected, the generic check_sparse_square_solving routines produces row-major +// rhs and destination matrices when compiled with EIGEN_DEFAULT_TO_ROW_MAJOR + +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif + +#include "sparse_solver.h" +#include +#include + +template void test_sparselu_T() +{ + SparseLU /*, COLAMDOrdering*/ > sparselu_colamd; // COLAMDOrdering is the default + SparseLU, AMDOrdering > sparselu_amd; + SparseLU, NaturalOrdering > sparselu_natural; + + check_sparse_square_solving(sparselu_colamd, 300, 100000, true); + check_sparse_square_solving(sparselu_amd, 300, 10000, true); + check_sparse_square_solving(sparselu_natural, 300, 2000, true); + + check_sparse_square_abs_determinant(sparselu_colamd); + check_sparse_square_abs_determinant(sparselu_amd); + + check_sparse_square_determinant(sparselu_colamd); + check_sparse_square_determinant(sparselu_amd); +} + +void test_sparselu() +{ + CALL_SUBTEST_1(test_sparselu_T()); + CALL_SUBTEST_2(test_sparselu_T()); + CALL_SUBTEST_3(test_sparselu_T >()); + CALL_SUBTEST_4(test_sparselu_T >()); +} diff --git a/thirdparty/eigen/test/sparseqr.cpp b/thirdparty/eigen/test/sparseqr.cpp new file mode 100644 index 000000000..e8605fd21 --- /dev/null +++ b/thirdparty/eigen/test/sparseqr.cpp @@ -0,0 +1,106 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa Wakam +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +#include "sparse.h" +#include + +template +int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300, int maxCols = 150) +{ + eigen_assert(maxRows >= maxCols); + typedef typename MatrixType::Scalar Scalar; + int rows = internal::random(1,maxRows); + int cols = internal::random(1,maxCols); + double density = (std::max)(8./(rows*cols), 0.01); + + A.resize(rows,cols); + dA.resize(rows,cols); + initSparse(density, dA, A,ForceNonZeroDiag); + A.makeCompressed(); + int nop = internal::random(0, internal::random(0,1) > 0.5 ? cols/2 : 0); + for(int k=0; k(0,cols-1); + int j1 = internal::random(0,cols-1); + Scalar s = internal::random(); + A.col(j0) = s * A.col(j1); + dA.col(j0) = s * dA.col(j1); + } + +// if(rows void test_sparseqr_scalar() +{ + typedef SparseMatrix MatrixType; + typedef Matrix DenseMat; + typedef Matrix DenseVector; + MatrixType A; + DenseMat dA; + DenseVector refX,x,b; + SparseQR > solver; + generate_sparse_rectangular_problem(A,dA); + + b = dA * DenseVector::Random(A.cols()); + solver.compute(A); + if(internal::random(0,1)>0.5f) + solver.factorize(A); // this checks that calling analyzePattern is not needed if the pattern do not change. + if (solver.info() != Success) + { + std::cerr << "sparse QR factorization failed\n"; + exit(0); + return; + } + x = solver.solve(b); + if (solver.info() != Success) + { + std::cerr << "sparse QR factorization failed\n"; + exit(0); + return; + } + + VERIFY_IS_APPROX(A * x, b); + + //Compare with a dense QR solver + ColPivHouseholderQR dqr(dA); + refX = dqr.solve(b); + + VERIFY_IS_EQUAL(dqr.rank(), solver.rank()); + if(solver.rank()==A.cols()) // full rank + VERIFY_IS_APPROX(x, refX); +// else +// VERIFY((dA * refX - b).norm() * 2 > (A * x - b).norm() ); + + // Compute explicitly the matrix Q + MatrixType Q, QtQ, idM; + Q = solver.matrixQ(); + //Check ||Q' * Q - I || + QtQ = Q * Q.adjoint(); + idM.resize(Q.rows(), Q.rows()); idM.setIdentity(); + VERIFY(idM.isApprox(QtQ)); + + // Q to dense + DenseMat dQ; + dQ = solver.matrixQ(); + VERIFY_IS_APPROX(Q, dQ); +} +void test_sparseqr() +{ + for(int i=0; i()); + CALL_SUBTEST_2(test_sparseqr_scalar >()); + } +} + diff --git a/thirdparty/eigen/test/special_numbers.cpp b/thirdparty/eigen/test/special_numbers.cpp new file mode 100644 index 000000000..2f1b704be --- /dev/null +++ b/thirdparty/eigen/test/special_numbers.cpp @@ -0,0 +1,58 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void special_numbers() +{ + typedef Matrix MatType; + int rows = internal::random(1,300); + int cols = internal::random(1,300); + + Scalar nan = std::numeric_limits::quiet_NaN(); + Scalar inf = std::numeric_limits::infinity(); + Scalar s1 = internal::random(); + + MatType m1 = MatType::Random(rows,cols), + mnan = MatType::Random(rows,cols), + minf = MatType::Random(rows,cols), + mboth = MatType::Random(rows,cols); + + int n = internal::random(1,10); + for(int k=0; k(0,rows-1), internal::random(0,cols-1)) = nan; + minf(internal::random(0,rows-1), internal::random(0,cols-1)) = inf; + } + mboth = mnan + minf; + + VERIFY(!m1.hasNaN()); + VERIFY(m1.allFinite()); + + VERIFY(mnan.hasNaN()); + VERIFY((s1*mnan).hasNaN()); + VERIFY(!minf.hasNaN()); + VERIFY(!(2*minf).hasNaN()); + VERIFY(mboth.hasNaN()); + VERIFY(mboth.array().hasNaN()); + + VERIFY(!mnan.allFinite()); + VERIFY(!minf.allFinite()); + VERIFY(!(minf-mboth).allFinite()); + VERIFY(!mboth.allFinite()); + VERIFY(!mboth.array().allFinite()); +} + +void test_special_numbers() +{ + for(int i = 0; i < 10*g_repeat; i++) { + CALL_SUBTEST_1( special_numbers() ); + CALL_SUBTEST_1( special_numbers() ); + } +} diff --git a/thirdparty/eigen/test/spqr_support.cpp b/thirdparty/eigen/test/spqr_support.cpp new file mode 100644 index 000000000..81e63b6a5 --- /dev/null +++ b/thirdparty/eigen/test/spqr_support.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#include "sparse.h" +#include + + +template +int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300, int maxCols = 300) +{ + eigen_assert(maxRows >= maxCols); + typedef typename MatrixType::Scalar Scalar; + int rows = internal::random(1,maxRows); + int cols = internal::random(1,rows); + double density = (std::max)(8./(rows*cols), 0.01); + + A.resize(rows,cols); + dA.resize(rows,cols); + initSparse(density, dA, A,ForceNonZeroDiag); + A.makeCompressed(); + return rows; +} + +template void test_spqr_scalar() +{ + typedef SparseMatrix MatrixType; + MatrixType A; + Matrix dA; + typedef Matrix DenseVector; + DenseVector refX,x,b; + SPQR solver; + generate_sparse_rectangular_problem(A,dA); + + Index m = A.rows(); + b = DenseVector::Random(m); + solver.compute(A); + if (solver.info() != Success) + { + std::cerr << "sparse QR factorization failed\n"; + exit(0); + return; + } + x = solver.solve(b); + if (solver.info() != Success) + { + std::cerr << "sparse QR factorization failed\n"; + exit(0); + return; + } + //Compare with a dense solver + refX = dA.colPivHouseholderQr().solve(b); + VERIFY(x.isApprox(refX,test_precision())); +} +void test_spqr_support() +{ + CALL_SUBTEST_1(test_spqr_scalar()); + CALL_SUBTEST_2(test_spqr_scalar >()); +} diff --git a/thirdparty/eigen/test/stable_norm.cpp b/thirdparty/eigen/test/stable_norm.cpp new file mode 100644 index 000000000..c3eb5ff31 --- /dev/null +++ b/thirdparty/eigen/test/stable_norm.cpp @@ -0,0 +1,192 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template EIGEN_DONT_INLINE T copy(const T& x) +{ + return x; +} + +template void stable_norm(const MatrixType& m) +{ + /* this test covers the following files: + StableNorm.h + */ + using std::sqrt; + using std::abs; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + bool complex_real_product_ok = true; + + // Check the basic machine-dependent constants. + { + int ibeta, it, iemin, iemax; + + ibeta = std::numeric_limits::radix; // base for floating-point numbers + it = std::numeric_limits::digits; // number of base-beta digits in mantissa + iemin = std::numeric_limits::min_exponent; // minimum exponent + iemax = std::numeric_limits::max_exponent; // maximum exponent + + VERIFY( (!(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2)) + && "the stable norm algorithm cannot be guaranteed on this computer"); + + Scalar inf = std::numeric_limits::infinity(); + if(NumTraits::IsComplex && (numext::isnan)(inf*RealScalar(1)) ) + { + complex_real_product_ok = false; + static bool first = true; + if(first) + std::cerr << "WARNING: compiler mess up complex*real product, " << inf << " * " << 1.0 << " = " << inf*RealScalar(1) << std::endl; + first = false; + } + } + + + Index rows = m.rows(); + Index cols = m.cols(); + + // get a non-zero random factor + Scalar factor = internal::random(); + while(numext::abs2(factor)(); + Scalar big = factor * ((std::numeric_limits::max)() * RealScalar(1e-4)); + + factor = internal::random(); + while(numext::abs2(factor)(); + Scalar small = factor * ((std::numeric_limits::min)() * RealScalar(1e4)); + + MatrixType vzero = MatrixType::Zero(rows, cols), + vrand = MatrixType::Random(rows, cols), + vbig(rows, cols), + vsmall(rows,cols); + + vbig.fill(big); + vsmall.fill(small); + + VERIFY_IS_MUCH_SMALLER_THAN(vzero.norm(), static_cast(1)); + VERIFY_IS_APPROX(vrand.stableNorm(), vrand.norm()); + VERIFY_IS_APPROX(vrand.blueNorm(), vrand.norm()); + VERIFY_IS_APPROX(vrand.hypotNorm(), vrand.norm()); + + RealScalar size = static_cast(m.size()); + + // test numext::isfinite + VERIFY(!(numext::isfinite)( std::numeric_limits::infinity())); + VERIFY(!(numext::isfinite)(sqrt(-abs(big)))); + + // test overflow + VERIFY((numext::isfinite)(sqrt(size)*abs(big))); + VERIFY_IS_NOT_APPROX(sqrt(copy(vbig.squaredNorm())), abs(sqrt(size)*big)); // here the default norm must fail + VERIFY_IS_APPROX(vbig.stableNorm(), sqrt(size)*abs(big)); + VERIFY_IS_APPROX(vbig.blueNorm(), sqrt(size)*abs(big)); + VERIFY_IS_APPROX(vbig.hypotNorm(), sqrt(size)*abs(big)); + + // test underflow + VERIFY((numext::isfinite)(sqrt(size)*abs(small))); + VERIFY_IS_NOT_APPROX(sqrt(copy(vsmall.squaredNorm())), abs(sqrt(size)*small)); // here the default norm must fail + VERIFY_IS_APPROX(vsmall.stableNorm(), sqrt(size)*abs(small)); + VERIFY_IS_APPROX(vsmall.blueNorm(), sqrt(size)*abs(small)); + VERIFY_IS_APPROX(vsmall.hypotNorm(), sqrt(size)*abs(small)); + + // Test compilation of cwise() version + VERIFY_IS_APPROX(vrand.colwise().stableNorm(), vrand.colwise().norm()); + VERIFY_IS_APPROX(vrand.colwise().blueNorm(), vrand.colwise().norm()); + VERIFY_IS_APPROX(vrand.colwise().hypotNorm(), vrand.colwise().norm()); + VERIFY_IS_APPROX(vrand.rowwise().stableNorm(), vrand.rowwise().norm()); + VERIFY_IS_APPROX(vrand.rowwise().blueNorm(), vrand.rowwise().norm()); + VERIFY_IS_APPROX(vrand.rowwise().hypotNorm(), vrand.rowwise().norm()); + + // test NaN, +inf, -inf + MatrixType v; + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + + // NaN + { + v = vrand; + v(i,j) = std::numeric_limits::quiet_NaN(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm())); + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm())); + } + + // +inf + { + v = vrand; + v(i,j) = std::numeric_limits::infinity(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); + if(complex_real_product_ok){ + VERIFY(isPlusInf(v.stableNorm())); + } + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm())); + } + + // -inf + { + v = vrand; + v(i,j) = -std::numeric_limits::infinity(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); + if(complex_real_product_ok) { + VERIFY(isPlusInf(v.stableNorm())); + } + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm())); + } + + // mix + { + Index i2 = internal::random(0,rows-1); + Index j2 = internal::random(0,cols-1); + v = vrand; + v(i,j) = -std::numeric_limits::infinity(); + v(i2,j2) = std::numeric_limits::quiet_NaN(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm())); + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm())); + } + + // stableNormalize[d] + { + VERIFY_IS_APPROX(vrand.stableNormalized(), vrand.normalized()); + MatrixType vcopy(vrand); + vcopy.stableNormalize(); + VERIFY_IS_APPROX(vcopy, vrand.normalized()); + VERIFY_IS_APPROX((vrand.stableNormalized()).norm(), RealScalar(1)); + VERIFY_IS_APPROX(vcopy.norm(), RealScalar(1)); + VERIFY_IS_APPROX((vbig.stableNormalized()).norm(), RealScalar(1)); + VERIFY_IS_APPROX((vsmall.stableNormalized()).norm(), RealScalar(1)); + RealScalar big_scaling = ((std::numeric_limits::max)() * RealScalar(1e-4)); + VERIFY_IS_APPROX(vbig/big_scaling, (vbig.stableNorm() * vbig.stableNormalized()).eval()/big_scaling); + VERIFY_IS_APPROX(vsmall, vsmall.stableNorm() * vsmall.stableNormalized()); + } +} + +void test_stable_norm() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( stable_norm(Matrix()) ); + CALL_SUBTEST_2( stable_norm(Vector4d()) ); + CALL_SUBTEST_3( stable_norm(VectorXd(internal::random(10,2000))) ); + CALL_SUBTEST_4( stable_norm(VectorXf(internal::random(10,2000))) ); + CALL_SUBTEST_5( stable_norm(VectorXcd(internal::random(10,2000))) ); + } +} diff --git a/thirdparty/eigen/test/stddeque.cpp b/thirdparty/eigen/test/stddeque.cpp new file mode 100644 index 000000000..bb4b476f3 --- /dev/null +++ b/thirdparty/eigen/test/stddeque.cpp @@ -0,0 +1,132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2010 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template +void check_stddeque_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + + Index rows = m.rows(); + Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + std::deque > v(10, MatrixType(rows,cols)), w(20, y); + v.front() = x; + w.front() = w.back(); + VERIFY_IS_APPROX(w.front(), w.back()); + v = w; + + typename std::deque >::iterator vi = v.begin(); + typename std::deque >::iterator wi = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*vi, *wi); + ++vi; + ++wi; + } + + v.resize(21); + v.back() = x; + VERIFY_IS_APPROX(v.back(), x); + v.resize(22,y); + VERIFY_IS_APPROX(v.back(), y); + v.push_back(x); + VERIFY_IS_APPROX(v.back(), x); +} + +template +void check_stddeque_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + std::deque > v(10), w(20, y); + v.front() = x; + w.front() = w.back(); + VERIFY_IS_APPROX(w.front(), w.back()); + v = w; + + typename std::deque >::iterator vi = v.begin(); + typename std::deque >::iterator wi = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*vi, *wi); + ++vi; + ++wi; + } + + v.resize(21); + v.back() = x; + VERIFY_IS_APPROX(v.back(), x); + v.resize(22,y); + VERIFY_IS_APPROX(v.back(), y); + v.push_back(x); + VERIFY_IS_APPROX(v.back(), x); +} + +template +void check_stddeque_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + std::deque > v(10), w(20, y); + v.front() = x; + w.front() = w.back(); + VERIFY_IS_APPROX(w.front(), w.back()); + v = w; + + typename std::deque >::iterator vi = v.begin(); + typename std::deque >::iterator wi = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*vi, *wi); + ++vi; + ++wi; + } + + v.resize(21); + v.back() = x; + VERIFY_IS_APPROX(v.back(), x); + v.resize(22,y); + VERIFY_IS_APPROX(v.back(), y); + v.push_back(x); + VERIFY_IS_APPROX(v.back(), x); +} + +void test_stddeque() +{ + // some non vectorizable fixed sizes + CALL_SUBTEST_1(check_stddeque_matrix(Vector2f())); + CALL_SUBTEST_1(check_stddeque_matrix(Matrix3f())); + CALL_SUBTEST_2(check_stddeque_matrix(Matrix3d())); + + // some vectorizable fixed sizes + CALL_SUBTEST_1(check_stddeque_matrix(Matrix2f())); + CALL_SUBTEST_1(check_stddeque_matrix(Vector4f())); + CALL_SUBTEST_1(check_stddeque_matrix(Matrix4f())); + CALL_SUBTEST_2(check_stddeque_matrix(Matrix4d())); + + // some dynamic sizes + CALL_SUBTEST_3(check_stddeque_matrix(MatrixXd(1,1))); + CALL_SUBTEST_3(check_stddeque_matrix(VectorXd(20))); + CALL_SUBTEST_3(check_stddeque_matrix(RowVectorXf(20))); + CALL_SUBTEST_3(check_stddeque_matrix(MatrixXcf(10,10))); + + // some Transform + CALL_SUBTEST_4(check_stddeque_transform(Affine2f())); + CALL_SUBTEST_4(check_stddeque_transform(Affine3f())); + CALL_SUBTEST_4(check_stddeque_transform(Affine3d())); + + // some Quaternion + CALL_SUBTEST_5(check_stddeque_quaternion(Quaternionf())); + CALL_SUBTEST_5(check_stddeque_quaternion(Quaterniond())); +} diff --git a/thirdparty/eigen/test/stddeque_overload.cpp b/thirdparty/eigen/test/stddeque_overload.cpp new file mode 100644 index 000000000..4da618bbf --- /dev/null +++ b/thirdparty/eigen/test/stddeque_overload.cpp @@ -0,0 +1,158 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2010 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include +#include + +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Vector4f) + +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix2f) +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4f) +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Matrix4d) + +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3f) +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Affine3d) + +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaternionf) +EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(Quaterniond) + +template +void check_stddeque_matrix(const MatrixType& m) +{ + typename MatrixType::Index rows = m.rows(); + typename MatrixType::Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + std::deque v(10, MatrixType(rows,cols)), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + + // do a lot of push_back such that the deque gets internally resized + // (with memory reallocation) + MatrixType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +void check_stddeque_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + std::deque v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + + // do a lot of push_back such that the deque gets internally resized + // (with memory reallocation) + TransformType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +void check_stddeque_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + std::deque v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + + // do a lot of push_back such that the deque gets internally resized + // (with memory reallocation) + QuaternionType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +// Copyright (C) 2010 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template +void check_stdlist_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + + Index rows = m.rows(); + Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + std::list > v(10, MatrixType(rows,cols)), w(20, y); + v.front() = x; + w.front() = w.back(); + VERIFY_IS_APPROX(w.front(), w.back()); + v = w; + + typename std::list >::iterator vi = v.begin(); + typename std::list >::iterator wi = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*vi, *wi); + ++vi; + ++wi; + } + + v.resize(21); + v.back() = x; + VERIFY_IS_APPROX(v.back(), x); + v.resize(22,y); + VERIFY_IS_APPROX(v.back(), y); + v.push_back(x); + VERIFY_IS_APPROX(v.back(), x); +} + +template +void check_stdlist_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + std::list > v(10), w(20, y); + v.front() = x; + w.front() = w.back(); + VERIFY_IS_APPROX(w.front(), w.back()); + v = w; + + typename std::list >::iterator vi = v.begin(); + typename std::list >::iterator wi = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*vi, *wi); + ++vi; + ++wi; + } + + v.resize(21); + v.back() = x; + VERIFY_IS_APPROX(v.back(), x); + v.resize(22,y); + VERIFY_IS_APPROX(v.back(), y); + v.push_back(x); + VERIFY_IS_APPROX(v.back(), x); +} + +template +void check_stdlist_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + std::list > v(10), w(20, y); + v.front() = x; + w.front() = w.back(); + VERIFY_IS_APPROX(w.front(), w.back()); + v = w; + + typename std::list >::iterator vi = v.begin(); + typename std::list >::iterator wi = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*vi, *wi); + ++vi; + ++wi; + } + + v.resize(21); + v.back() = x; + VERIFY_IS_APPROX(v.back(), x); + v.resize(22,y); + VERIFY_IS_APPROX(v.back(), y); + v.push_back(x); + VERIFY_IS_APPROX(v.back(), x); +} + +void test_stdlist() +{ + // some non vectorizable fixed sizes + CALL_SUBTEST_1(check_stdlist_matrix(Vector2f())); + CALL_SUBTEST_1(check_stdlist_matrix(Matrix3f())); + CALL_SUBTEST_2(check_stdlist_matrix(Matrix3d())); + + // some vectorizable fixed sizes + CALL_SUBTEST_1(check_stdlist_matrix(Matrix2f())); + CALL_SUBTEST_1(check_stdlist_matrix(Vector4f())); + CALL_SUBTEST_1(check_stdlist_matrix(Matrix4f())); + CALL_SUBTEST_2(check_stdlist_matrix(Matrix4d())); + + // some dynamic sizes + CALL_SUBTEST_3(check_stdlist_matrix(MatrixXd(1,1))); + CALL_SUBTEST_3(check_stdlist_matrix(VectorXd(20))); + CALL_SUBTEST_3(check_stdlist_matrix(RowVectorXf(20))); + CALL_SUBTEST_3(check_stdlist_matrix(MatrixXcf(10,10))); + + // some Transform + CALL_SUBTEST_4(check_stdlist_transform(Affine2f())); + CALL_SUBTEST_4(check_stdlist_transform(Affine3f())); + CALL_SUBTEST_4(check_stdlist_transform(Affine3d())); + + // some Quaternion + CALL_SUBTEST_5(check_stdlist_quaternion(Quaternionf())); + CALL_SUBTEST_5(check_stdlist_quaternion(Quaterniond())); +} diff --git a/thirdparty/eigen/test/stdlist_overload.cpp b/thirdparty/eigen/test/stdlist_overload.cpp new file mode 100644 index 000000000..bb910bd43 --- /dev/null +++ b/thirdparty/eigen/test/stdlist_overload.cpp @@ -0,0 +1,192 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2010 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include +#include + +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Vector4f) + +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix2f) +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4f) +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Matrix4d) + +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3f) +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Affine3d) + +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaternionf) +EIGEN_DEFINE_STL_LIST_SPECIALIZATION(Quaterniond) + +template +typename Container::iterator get(Container & c, Position position) +{ + typename Container::iterator it = c.begin(); + std::advance(it, position); + return it; +} + +template +void set(Container & c, Position position, const Value & value) +{ + typename Container::iterator it = c.begin(); + std::advance(it, position); + *it = value; +} + +template +void check_stdlist_matrix(const MatrixType& m) +{ + typename MatrixType::Index rows = m.rows(); + typename MatrixType::Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + std::list v(10, MatrixType(rows,cols)), w(20, y); + typename std::list::iterator itv = get(v, 5); + typename std::list::iterator itw = get(w, 6); + *itv = x; + *itw = *itv; + VERIFY_IS_APPROX(*itw, *itv); + v = w; + itv = v.begin(); + itw = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*itw, *itv); + ++itv; + ++itw; + } + + v.resize(21); + set(v, 20, x); + VERIFY_IS_APPROX(*get(v, 20), x); + v.resize(22,y); + VERIFY_IS_APPROX(*get(v, 21), y); + v.push_back(x); + VERIFY_IS_APPROX(*get(v, 22), x); + + // do a lot of push_back such that the list gets internally resized + // (with memory reallocation) + MatrixType* ref = &(*get(w, 0)); + for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i) + v.push_back(*get(w, i%w.size())); + for(unsigned int i=23; i +void check_stdlist_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + std::list v(10), w(20, y); + typename std::list::iterator itv = get(v, 5); + typename std::list::iterator itw = get(w, 6); + *itv = x; + *itw = *itv; + VERIFY_IS_APPROX(*itw, *itv); + v = w; + itv = v.begin(); + itw = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*itw, *itv); + ++itv; + ++itw; + } + + v.resize(21); + set(v, 20, x); + VERIFY_IS_APPROX(*get(v, 20), x); + v.resize(22,y); + VERIFY_IS_APPROX(*get(v, 21), y); + v.push_back(x); + VERIFY_IS_APPROX(*get(v, 22), x); + + // do a lot of push_back such that the list gets internally resized + // (with memory reallocation) + TransformType* ref = &(*get(w, 0)); + for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i) + v.push_back(*get(w, i%w.size())); + for(unsigned int i=23; imatrix()==get(w, (i-23)%w.size())->matrix()); + } +} + +template +void check_stdlist_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + std::list v(10), w(20, y); + typename std::list::iterator itv = get(v, 5); + typename std::list::iterator itw = get(w, 6); + *itv = x; + *itw = *itv; + VERIFY_IS_APPROX(*itw, *itv); + v = w; + itv = v.begin(); + itw = w.begin(); + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(*itw, *itv); + ++itv; + ++itw; + } + + v.resize(21); + set(v, 20, x); + VERIFY_IS_APPROX(*get(v, 20), x); + v.resize(22,y); + VERIFY_IS_APPROX(*get(v, 21), y); + v.push_back(x); + VERIFY_IS_APPROX(*get(v, 22), x); + + // do a lot of push_back such that the list gets internally resized + // (with memory reallocation) + QuaternionType* ref = &(*get(w, 0)); + for(int i=0; i<30 || ((ref==&(*get(w, 0))) && i<300); ++i) + v.push_back(*get(w, i%w.size())); + for(unsigned int i=23; icoeffs()==get(w, (i-23)%w.size())->coeffs()); + } +} + +void test_stdlist_overload() +{ + // some non vectorizable fixed sizes + CALL_SUBTEST_1(check_stdlist_matrix(Vector2f())); + CALL_SUBTEST_1(check_stdlist_matrix(Matrix3f())); + CALL_SUBTEST_2(check_stdlist_matrix(Matrix3d())); + + // some vectorizable fixed sizes + CALL_SUBTEST_1(check_stdlist_matrix(Matrix2f())); + CALL_SUBTEST_1(check_stdlist_matrix(Vector4f())); + CALL_SUBTEST_1(check_stdlist_matrix(Matrix4f())); + CALL_SUBTEST_2(check_stdlist_matrix(Matrix4d())); + + // some dynamic sizes + CALL_SUBTEST_3(check_stdlist_matrix(MatrixXd(1,1))); + CALL_SUBTEST_3(check_stdlist_matrix(VectorXd(20))); + CALL_SUBTEST_3(check_stdlist_matrix(RowVectorXf(20))); + CALL_SUBTEST_3(check_stdlist_matrix(MatrixXcf(10,10))); + + // some Transform + CALL_SUBTEST_4(check_stdlist_transform(Affine2f())); // does not need the specialization (2+1)^2 = 9 + CALL_SUBTEST_4(check_stdlist_transform(Affine3f())); + CALL_SUBTEST_4(check_stdlist_transform(Affine3d())); + + // some Quaternion + CALL_SUBTEST_5(check_stdlist_quaternion(Quaternionf())); + CALL_SUBTEST_5(check_stdlist_quaternion(Quaterniond())); +} diff --git a/thirdparty/eigen/test/stdvector.cpp b/thirdparty/eigen/test/stdvector.cpp new file mode 100644 index 000000000..50cb3341d --- /dev/null +++ b/thirdparty/eigen/test/stdvector.cpp @@ -0,0 +1,148 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include + +template +void check_stdvector_matrix(const MatrixType& m) +{ + typename MatrixType::Index rows = m.rows(); + typename MatrixType::Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + std::vector > v(10, MatrixType(rows,cols)), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + MatrixType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +void check_stdvector_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + std::vector > v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + TransformType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +void check_stdvector_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + std::vector > v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + QuaternionType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +// Copyright (C) 2010 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include +#include + +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Vector4f) + +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Matrix2f) +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Matrix4f) +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Matrix4d) + +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Affine3f) +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Affine3d) + +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Quaternionf) +EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(Quaterniond) + +template +void check_stdvector_matrix(const MatrixType& m) +{ + typename MatrixType::Index rows = m.rows(); + typename MatrixType::Index cols = m.cols(); + MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); + std::vector v(10, MatrixType(rows,cols)), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + MatrixType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +void check_stdvector_transform(const TransformType&) +{ + typedef typename TransformType::MatrixType MatrixType; + TransformType x(MatrixType::Random()), y(MatrixType::Random()); + std::vector v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + TransformType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +void check_stdvector_quaternion(const QuaternionType&) +{ + typedef typename QuaternionType::Coefficients Coefficients; + QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); + std::vector v(10), w(20, y); + v[5] = x; + w[6] = v[5]; + VERIFY_IS_APPROX(w[6], v[5]); + v = w; + for(int i = 0; i < 20; i++) + { + VERIFY_IS_APPROX(w[i], v[i]); + } + + v.resize(21); + v[20] = x; + VERIFY_IS_APPROX(v[20], x); + v.resize(22,y); + VERIFY_IS_APPROX(v[21], y); + v.push_back(x); + VERIFY_IS_APPROX(v[22], x); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType)); + + // do a lot of push_back such that the vector gets internally resized + // (with memory reallocation) + QuaternionType* ref = &w[0]; + for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) + v.push_back(w[i%w.size()]); + for(unsigned int i=23; i +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#include "sparse_solver.h" + +#include + +void test_superlu_support() +{ + SuperLU > superlu_double_colmajor; + SuperLU > > superlu_cplxdouble_colmajor; + CALL_SUBTEST_1( check_sparse_square_solving(superlu_double_colmajor) ); + CALL_SUBTEST_2( check_sparse_square_solving(superlu_cplxdouble_colmajor) ); + CALL_SUBTEST_1( check_sparse_square_determinant(superlu_double_colmajor) ); + CALL_SUBTEST_2( check_sparse_square_determinant(superlu_cplxdouble_colmajor) ); +} diff --git a/thirdparty/eigen/test/svd_common.h b/thirdparty/eigen/test/svd_common.h new file mode 100644 index 000000000..605d5dfef --- /dev/null +++ b/thirdparty/eigen/test/svd_common.h @@ -0,0 +1,483 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef SVD_DEFAULT +#error a macro SVD_DEFAULT(MatrixType) must be defined prior to including svd_common.h +#endif + +#ifndef SVD_FOR_MIN_NORM +#error a macro SVD_FOR_MIN_NORM(MatrixType) must be defined prior to including svd_common.h +#endif + +#include "svd_fill.h" + +// Check that the matrix m is properly reconstructed and that the U and V factors are unitary +// The SVD must have already been computed. +template +void svd_check_full(const MatrixType& m, const SvdType& svd) +{ + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix MatrixUType; + typedef Matrix MatrixVType; + + MatrixType sigma = MatrixType::Zero(rows,cols); + sigma.diagonal() = svd.singularValues().template cast(); + MatrixUType u = svd.matrixU(); + MatrixVType v = svd.matrixV(); + RealScalar scaling = m.cwiseAbs().maxCoeff(); + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(sigma.cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint()); + } + VERIFY_IS_UNITARY(u); + VERIFY_IS_UNITARY(v); +} + +// Compare partial SVD defined by computationOptions to a full SVD referenceSvd +template +void svd_compare_to_full(const MatrixType& m, + unsigned int computationOptions, + const SvdType& referenceSvd) +{ + typedef typename MatrixType::RealScalar RealScalar; + Index rows = m.rows(); + Index cols = m.cols(); + Index diagSize = (std::min)(rows, cols); + RealScalar prec = test_precision(); + + SvdType svd(m, computationOptions); + + VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues()); + + if(computationOptions & (ComputeFullV|ComputeThinV)) + { + VERIFY( (svd.matrixV().adjoint()*svd.matrixV()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint(), + referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).adjoint()); + } + + if(computationOptions & (ComputeFullU|ComputeThinU)) + { + VERIFY( (svd.matrixU().adjoint()*svd.matrixU()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).adjoint(), + referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).adjoint()); + } + + // The following checks are not critical. + // For instance, with Dived&Conquer SVD, if only the factor 'V' is computedt then different matrix-matrix product implementation will be used + // and the resulting 'V' factor might be significantly different when the SVD decomposition is not unique, especially with single precision float. + ++g_test_level; + if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU()); + if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize)); + if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs()); + if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize)); + --g_test_level; +} + +// +template +void svd_least_square(const MatrixType& m, unsigned int computationOptions) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix RhsType; + typedef Matrix SolutionType; + + RhsType rhs = RhsType::Random(rows, internal::random(1, cols)); + SvdType svd(m, computationOptions); + + if(internal::is_same::value) svd.setThreshold(1e-8); + else if(internal::is_same::value) svd.setThreshold(2e-4); + + SolutionType x = svd.solve(rhs); + + RealScalar residual = (m*x-rhs).norm(); + RealScalar rhs_norm = rhs.norm(); + if(!test_isMuchSmallerThan(residual,rhs.norm())) + { + // ^^^ If the residual is very small, then we have an exact solution, so we are already good. + + // evaluate normal equation which works also for least-squares solutions + if(internal::is_same::value || svd.rank()==m.diagonal().size()) + { + using std::sqrt; + // This test is not stable with single precision. + // This is probably because squaring m signicantly affects the precision. + if(internal::is_same::value) ++g_test_level; + + VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs); + + if(internal::is_same::value) --g_test_level; + } + + // Check that there is no significantly better solution in the neighborhood of x + for(Index k=0;k::epsilon())*x.row(k); + RealScalar residual_y = (m*y-rhs).norm(); + VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); + if(internal::is_same::value) ++g_test_level; + VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); + if(internal::is_same::value) --g_test_level; + + y.row(k) = (RealScalar(1)-2*NumTraits::epsilon())*x.row(k); + residual_y = (m*y-rhs).norm(); + VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); + if(internal::is_same::value) ++g_test_level; + VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); + if(internal::is_same::value) --g_test_level; + } + } +} + +// check minimal norm solutions, the inoput matrix m is only used to recover problem size +template +void svd_min_norm(const MatrixType& m, unsigned int computationOptions) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + Index cols = m.cols(); + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix SolutionType; + + // generate a full-rank m x n problem with m MatrixType2; + typedef Matrix RhsType2; + typedef Matrix MatrixType2T; + Index rank = RankAtCompileTime2==Dynamic ? internal::random(1,cols) : Index(RankAtCompileTime2); + MatrixType2 m2(rank,cols); + int guard = 0; + do { + m2.setRandom(); + } while(SVD_FOR_MIN_NORM(MatrixType2)(m2).setThreshold(test_precision()).rank()!=rank && (++guard)<10); + VERIFY(guard<10); + + RhsType2 rhs2 = RhsType2::Random(rank); + // use QR to find a reference minimal norm solution + HouseholderQR qr(m2.adjoint()); + Matrix tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView().adjoint().solve(rhs2); + tmp.conservativeResize(cols); + tmp.tail(cols-rank).setZero(); + SolutionType x21 = qr.householderQ() * tmp; + // now check with SVD + SVD_FOR_MIN_NORM(MatrixType2) svd2(m2, computationOptions); + SolutionType x22 = svd2.solve(rhs2); + VERIFY_IS_APPROX(m2*x21, rhs2); + VERIFY_IS_APPROX(m2*x22, rhs2); + VERIFY_IS_APPROX(x21, x22); + + // Now check with a rank deficient matrix + typedef Matrix MatrixType3; + typedef Matrix RhsType3; + Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random(rank+1,2*cols) : Index(RowsAtCompileTime3); + Matrix C = Matrix::Random(rows3,rank); + MatrixType3 m3 = C * m2; + RhsType3 rhs3 = C * rhs2; + SVD_FOR_MIN_NORM(MatrixType3) svd3(m3, computationOptions); + SolutionType x3 = svd3.solve(rhs3); + VERIFY_IS_APPROX(m3*x3, rhs3); + VERIFY_IS_APPROX(m3*x21, rhs3); + VERIFY_IS_APPROX(m2*x3, rhs2); + VERIFY_IS_APPROX(x21, x3); +} + +// Check full, compare_to_full, least_square, and min_norm for all possible compute-options +template +void svd_test_all_computation_options(const MatrixType& m, bool full_only) +{ +// if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols()) +// return; + SvdType fullSvd(m, ComputeFullU|ComputeFullV); + CALL_SUBTEST(( svd_check_full(m, fullSvd) )); + CALL_SUBTEST(( svd_least_square(m, ComputeFullU | ComputeFullV) )); + CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeFullV) )); + + #if defined __INTEL_COMPILER + // remark #111: statement is unreachable + #pragma warning disable 111 + #endif + if(full_only) + return; + + CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, 0, fullSvd) )); + + if (MatrixType::ColsAtCompileTime == Dynamic) { + // thin U/V are only available with dynamic number of columns + CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU , fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) )); + + CALL_SUBTEST(( svd_least_square(m, ComputeFullU | ComputeThinV) )); + CALL_SUBTEST(( svd_least_square(m, ComputeThinU | ComputeFullV) )); + CALL_SUBTEST(( svd_least_square(m, ComputeThinU | ComputeThinV) )); + + CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeThinV) )); + CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeFullV) )); + CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeThinV) )); + + // test reconstruction + typedef typename MatrixType::Index Index; + Index diagSize = (std::min)(m.rows(), m.cols()); + SvdType svd(m, ComputeThinU | ComputeThinV); + VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint()); + } +} + + +// work around stupid msvc error when constructing at compile time an expression that involves +// a division by zero, even if the numeric type has floating point +template +EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); } + +// workaround aggressive optimization in ICC +template EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; } + +// all this function does is verify we don't iterate infinitely on nan/inf values +template +void svd_inf_nan() +{ + SvdType svd; + typedef typename MatrixType::Scalar Scalar; + Scalar some_inf = Scalar(1) / zero(); + VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf)); + svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV); + + Scalar nan = std::numeric_limits::quiet_NaN(); + VERIFY(nan != nan); + svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV); + + MatrixType m = MatrixType::Zero(10,10); + m(internal::random(0,9), internal::random(0,9)) = some_inf; + svd.compute(m, ComputeFullU | ComputeFullV); + + m = MatrixType::Zero(10,10); + m(internal::random(0,9), internal::random(0,9)) = nan; + svd.compute(m, ComputeFullU | ComputeFullV); + + // regression test for bug 791 + m.resize(3,3); + m << 0, 2*NumTraits::epsilon(), 0.5, + 0, -0.5, 0, + nan, 0, 0; + svd.compute(m, ComputeFullU | ComputeFullV); + + m.resize(4,4); + m << 1, 0, 0, 0, + 0, 3, 1, 2e-308, + 1, 0, 1, nan, + 0, nan, nan, 0; + svd.compute(m, ComputeFullU | ComputeFullV); +} + +// Regression test for bug 286: JacobiSVD loops indefinitely with some +// matrices containing denormal numbers. +template +void svd_underoverflow() +{ +#if defined __INTEL_COMPILER +// shut up warning #239: floating point underflow +#pragma warning push +#pragma warning disable 239 +#endif + Matrix2d M; + M << -7.90884e-313, -4.94e-324, + 0, 5.60844e-313; + SVD_DEFAULT(Matrix2d) svd; + svd.compute(M,ComputeFullU|ComputeFullV); + CALL_SUBTEST( svd_check_full(M,svd) ); + + // Check all 2x2 matrices made with the following coefficients: + VectorXd value_set(9); + value_set << 0, 1, -1, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -4.94e-223, 4.94e-223; + Array4i id(0,0,0,0); + int k = 0; + do + { + M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3)); + svd.compute(M,ComputeFullU|ComputeFullV); + CALL_SUBTEST( svd_check_full(M,svd) ); + + id(k)++; + if(id(k)>=value_set.size()) + { + while(k<3 && id(k)>=value_set.size()) id(++k)++; + id.head(k).setZero(); + k=0; + } + + } while((id +void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) ) +{ + MatrixType M; + VectorXd value_set(3); + value_set << 0, 1, -1; + Array4i id(0,0,0,0); + int k = 0; + do + { + M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3)); + + cb(M,false); + + id(k)++; + if(id(k)>=value_set.size()) + { + while(k<3 && id(k)>=value_set.size()) id(++k)++; + id.head(k).setZero(); + k=0; + } + + } while((id +void svd_preallocate() +{ + Vector3f v(3.f, 2.f, 1.f); + MatrixXf m = v.asDiagonal(); + + internal::set_is_malloc_allowed(false); + VERIFY_RAISES_ASSERT(VectorXf tmp(10);) + SVD_DEFAULT(MatrixXf) svd; + internal::set_is_malloc_allowed(true); + svd.compute(m); + VERIFY_IS_APPROX(svd.singularValues(), v); + + SVD_DEFAULT(MatrixXf) svd2(3,3); + internal::set_is_malloc_allowed(false); + svd2.compute(m); + internal::set_is_malloc_allowed(true); + VERIFY_IS_APPROX(svd2.singularValues(), v); + VERIFY_RAISES_ASSERT(svd2.matrixU()); + VERIFY_RAISES_ASSERT(svd2.matrixV()); + svd2.compute(m, ComputeFullU | ComputeFullV); + VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); + VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); + internal::set_is_malloc_allowed(false); + svd2.compute(m); + internal::set_is_malloc_allowed(true); + + SVD_DEFAULT(MatrixXf) svd3(3,3,ComputeFullU|ComputeFullV); + internal::set_is_malloc_allowed(false); + svd2.compute(m); + internal::set_is_malloc_allowed(true); + VERIFY_IS_APPROX(svd2.singularValues(), v); + VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); + VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); + internal::set_is_malloc_allowed(false); + svd2.compute(m, ComputeFullU|ComputeFullV); + internal::set_is_malloc_allowed(true); +} + +template +void svd_verify_assert(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix RhsType; + RhsType rhs(rows); + SvdType svd; + VERIFY_RAISES_ASSERT(svd.matrixU()) + VERIFY_RAISES_ASSERT(svd.singularValues()) + VERIFY_RAISES_ASSERT(svd.matrixV()) + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + MatrixType a = MatrixType::Zero(rows, cols); + a.setZero(); + svd.compute(a, 0); + VERIFY_RAISES_ASSERT(svd.matrixU()) + VERIFY_RAISES_ASSERT(svd.matrixV()) + svd.singularValues(); + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + + if (ColsAtCompileTime == Dynamic) + { + svd.compute(a, ComputeThinU); + svd.matrixU(); + VERIFY_RAISES_ASSERT(svd.matrixV()) + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + svd.compute(a, ComputeThinV); + svd.matrixV(); + VERIFY_RAISES_ASSERT(svd.matrixU()) + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + } + else + { + VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU)) + VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV)) + } +} + +#undef SVD_DEFAULT +#undef SVD_FOR_MIN_NORM diff --git a/thirdparty/eigen/test/svd_fill.h b/thirdparty/eigen/test/svd_fill.h new file mode 100644 index 000000000..3877c0c7e --- /dev/null +++ b/thirdparty/eigen/test/svd_fill.h @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +template +Array four_denorms(); + +template<> +Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); } +template<> +Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); } +template +Array four_denorms() { return four_denorms().cast(); } + +template +void svd_fill_random(MatrixType &m, int Option = 0) +{ + using std::pow; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + Index diagSize = (std::min)(m.rows(), m.cols()); + RealScalar s = std::numeric_limits::max_exponent10/4; + s = internal::random(1,s); + Matrix d = Matrix::Random(diagSize); + for(Index k=0; k(-s,s)); + + bool dup = internal::random(0,10) < 3; + bool unit_uv = internal::random(0,10) < (dup?7:3); // if we duplicate some diagonal entries, then increase the chance to preserve them using unitary U and V factors + + // duplicate some singular values + if(dup) + { + Index n = internal::random(0,d.size()-1); + for(Index i=0; i(0,d.size()-1)) = d(internal::random(0,d.size()-1)); + } + + Matrix U(m.rows(),diagSize); + Matrix VT(diagSize,m.cols()); + if(unit_uv) + { + // in very rare cases let's try with a pure diagonal matrix + if(internal::random(0,10) < 1) + { + U.setIdentity(); + VT.setIdentity(); + } + else + { + createRandomPIMatrixOfRank(diagSize,U.rows(), U.cols(), U); + createRandomPIMatrixOfRank(diagSize,VT.rows(), VT.cols(), VT); + } + } + else + { + U.setRandom(); + VT.setRandom(); + } + + Matrix samples(9); + samples << 0, four_denorms(), + -RealScalar(1)/NumTraits::highest(), RealScalar(1)/NumTraits::highest(), (std::numeric_limits::min)(), pow((std::numeric_limits::min)(),0.8); + + if(Option==Symmetric) + { + m = U * d.asDiagonal() * U.transpose(); + + // randomly nullify some rows/columns + { + Index count = internal::random(-diagSize,diagSize); + for(Index k=0; k(0,diagSize-1); + m.row(i).setZero(); + m.col(i).setZero(); + } + if(count<0) + // (partly) cancel some coeffs + if(!(dup && unit_uv)) + { + + Index n = internal::random(0,m.size()-1); + for(Index k=0; k(0,m.rows()-1); + Index j = internal::random(0,m.cols()-1); + m(j,i) = m(i,j) = samples(internal::random(0,samples.size()-1)); + if(NumTraits::IsComplex) + *(&numext::real_ref(m(j,i))+1) = *(&numext::real_ref(m(i,j))+1) = samples.real()(internal::random(0,samples.size()-1)); + } + } + } + } + else + { + m = U * d.asDiagonal() * VT; + // (partly) cancel some coeffs + if(!(dup && unit_uv)) + { + Index n = internal::random(0,m.size()-1); + for(Index k=0; k(0,m.rows()-1); + Index j = internal::random(0,m.cols()-1); + m(i,j) = samples(internal::random(0,samples.size()-1)); + if(NumTraits::IsComplex) + *(&numext::real_ref(m(i,j))+1) = samples.real()(internal::random(0,samples.size()-1)); + } + } + } +} + diff --git a/thirdparty/eigen/test/swap.cpp b/thirdparty/eigen/test/swap.cpp new file mode 100644 index 000000000..f76e3624d --- /dev/null +++ b/thirdparty/eigen/test/swap.cpp @@ -0,0 +1,94 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_STATIC_ASSERT +#include "main.h" + +template +struct other_matrix_type +{ + typedef int type; +}; + +template +struct other_matrix_type > +{ + typedef Matrix<_Scalar, _Rows, _Cols, _Options^RowMajor, _MaxRows, _MaxCols> type; +}; + +template void swap(const MatrixType& m) +{ + typedef typename other_matrix_type::type OtherMatrixType; + typedef typename MatrixType::Scalar Scalar; + + eigen_assert((!internal::is_same::value)); + typename MatrixType::Index rows = m.rows(); + typename MatrixType::Index cols = m.cols(); + + // construct 3 matrix guaranteed to be distinct + MatrixType m1 = MatrixType::Random(rows,cols); + MatrixType m2 = MatrixType::Random(rows,cols) + Scalar(100) * MatrixType::Identity(rows,cols); + OtherMatrixType m3 = OtherMatrixType::Random(rows,cols) + Scalar(200) * OtherMatrixType::Identity(rows,cols); + + MatrixType m1_copy = m1; + MatrixType m2_copy = m2; + OtherMatrixType m3_copy = m3; + + // test swapping 2 matrices of same type + Scalar *d1=m1.data(), *d2=m2.data(); + m1.swap(m2); + VERIFY_IS_APPROX(m1,m2_copy); + VERIFY_IS_APPROX(m2,m1_copy); + if(MatrixType::SizeAtCompileTime==Dynamic) + { + VERIFY(m1.data()==d2); + VERIFY(m2.data()==d1); + } + m1 = m1_copy; + m2 = m2_copy; + + // test swapping 2 matrices of different types + m1.swap(m3); + VERIFY_IS_APPROX(m1,m3_copy); + VERIFY_IS_APPROX(m3,m1_copy); + m1 = m1_copy; + m3 = m3_copy; + + // test swapping matrix with expression + m1.swap(m2.block(0,0,rows,cols)); + VERIFY_IS_APPROX(m1,m2_copy); + VERIFY_IS_APPROX(m2,m1_copy); + m1 = m1_copy; + m2 = m2_copy; + + // test swapping two expressions of different types + m1.transpose().swap(m3.transpose()); + VERIFY_IS_APPROX(m1,m3_copy); + VERIFY_IS_APPROX(m3,m1_copy); + m1 = m1_copy; + m3 = m3_copy; + + if(m1.rows()>1) + { + // test assertion on mismatching size -- matrix case + VERIFY_RAISES_ASSERT(m1.swap(m1.row(0))); + // test assertion on mismatching size -- xpr case + VERIFY_RAISES_ASSERT(m1.row(0).swap(m1)); + } +} + +void test_swap() +{ + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization + CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization + CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization + CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization + TEST_SET_BUT_UNUSED_VARIABLE(s) +} diff --git a/thirdparty/eigen/test/triangular.cpp b/thirdparty/eigen/test/triangular.cpp new file mode 100644 index 000000000..b96856486 --- /dev/null +++ b/thirdparty/eigen/test/triangular.cpp @@ -0,0 +1,247 @@ +// This file is triangularView of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + + + +template void triangular_square(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix VectorType; + + RealScalar largerEps = 10*test_precision(); + + typename MatrixType::Index rows = m.rows(); + typename MatrixType::Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + m4(rows, cols), + r1(rows, cols), + r2(rows, cols); + VectorType v2 = VectorType::Random(rows); + + MatrixType m1up = m1.template triangularView(); + MatrixType m2up = m2.template triangularView(); + + if (rows*cols>1) + { + VERIFY(m1up.isUpperTriangular()); + VERIFY(m2up.transpose().isLowerTriangular()); + VERIFY(!m2.isLowerTriangular()); + } + +// VERIFY_IS_APPROX(m1up.transpose() * m2, m1.upper().transpose().lower() * m2); + + // test overloaded operator+= + r1.setZero(); + r2.setZero(); + r1.template triangularView() += m1; + r2 += m1up; + VERIFY_IS_APPROX(r1,r2); + + // test overloaded operator= + m1.setZero(); + m1.template triangularView() = m2.transpose() + m2; + m3 = m2.transpose() + m2; + VERIFY_IS_APPROX(m3.template triangularView().transpose().toDenseMatrix(), m1); + + // test overloaded operator= + m1.setZero(); + m1.template triangularView() = m2.transpose() + m2; + VERIFY_IS_APPROX(m3.template triangularView().toDenseMatrix(), m1); + + VERIFY_IS_APPROX(m3.template triangularView().conjugate().toDenseMatrix(), + m3.conjugate().template triangularView().toDenseMatrix()); + + m1 = MatrixType::Random(rows, cols); + for (int i=0; i(); + + Transpose trm4(m4); + // test back and forward subsitution with a vector as the rhs + m3 = m1.template triangularView(); + VERIFY(v2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView().solve(v2)), largerEps)); + m3 = m1.template triangularView(); + VERIFY(v2.isApprox(m3.transpose() * (m1.transpose().template triangularView().solve(v2)), largerEps)); + m3 = m1.template triangularView(); + VERIFY(v2.isApprox(m3 * (m1.template triangularView().solve(v2)), largerEps)); + m3 = m1.template triangularView(); + VERIFY(v2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView().solve(v2)), largerEps)); + + // test back and forward substitution with a matrix as the rhs + m3 = m1.template triangularView(); + VERIFY(m2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView().solve(m2)), largerEps)); + m3 = m1.template triangularView(); + VERIFY(m2.isApprox(m3.transpose() * (m1.transpose().template triangularView().solve(m2)), largerEps)); + m3 = m1.template triangularView(); + VERIFY(m2.isApprox(m3 * (m1.template triangularView().solve(m2)), largerEps)); + m3 = m1.template triangularView(); + VERIFY(m2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView().solve(m2)), largerEps)); + + // check M * inv(L) using in place API + m4 = m3; + m1.transpose().template triangularView().solveInPlace(trm4); + VERIFY_IS_APPROX(m4 * m1.template triangularView(), m3); + + // check M * inv(U) using in place API + m3 = m1.template triangularView(); + m4 = m3; + m3.transpose().template triangularView().solveInPlace(trm4); + VERIFY_IS_APPROX(m4 * m1.template triangularView(), m3); + + // check solve with unit diagonal + m3 = m1.template triangularView(); + VERIFY(m2.isApprox(m3 * (m1.template triangularView().solve(m2)), largerEps)); + +// VERIFY(( m1.template triangularView() +// * m2.template triangularView()).isUpperTriangular()); + + // test swap + m1.setOnes(); + m2.setZero(); + m2.template triangularView().swap(m1); + m3.setZero(); + m3.template triangularView().setOnes(); + VERIFY_IS_APPROX(m2,m3); + + m1.setRandom(); + m3 = m1.template triangularView(); + Matrix m5(cols, internal::random(1,20)); m5.setRandom(); + Matrix m6(internal::random(1,20), rows); m6.setRandom(); + VERIFY_IS_APPROX(m1.template triangularView() * m5, m3*m5); + VERIFY_IS_APPROX(m6*m1.template triangularView(), m6*m3); + + m1up = m1.template triangularView(); + VERIFY_IS_APPROX(m1.template selfadjointView().template triangularView().toDenseMatrix(), m1up); + VERIFY_IS_APPROX(m1up.template selfadjointView().template triangularView().toDenseMatrix(), m1up); + VERIFY_IS_APPROX(m1.template selfadjointView().template triangularView().toDenseMatrix(), m1up.adjoint()); + VERIFY_IS_APPROX(m1up.template selfadjointView().template triangularView().toDenseMatrix(), m1up.adjoint()); + + VERIFY_IS_APPROX(m1.template selfadjointView().diagonal(), m1.diagonal()); + +} + + +template void triangular_rect(const MatrixType& m) +{ + typedef const typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; + + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + m4(rows, cols), + r1(rows, cols), + r2(rows, cols); + + MatrixType m1up = m1.template triangularView(); + MatrixType m2up = m2.template triangularView(); + + if (rows>1 && cols>1) + { + VERIFY(m1up.isUpperTriangular()); + VERIFY(m2up.transpose().isLowerTriangular()); + VERIFY(!m2.isLowerTriangular()); + } + + // test overloaded operator+= + r1.setZero(); + r2.setZero(); + r1.template triangularView() += m1; + r2 += m1up; + VERIFY_IS_APPROX(r1,r2); + + // test overloaded operator= + m1.setZero(); + m1.template triangularView() = 3 * m2; + m3 = 3 * m2; + VERIFY_IS_APPROX(m3.template triangularView().toDenseMatrix(), m1); + + + m1.setZero(); + m1.template triangularView() = 3 * m2; + VERIFY_IS_APPROX(m3.template triangularView().toDenseMatrix(), m1); + + m1.setZero(); + m1.template triangularView() = 3 * m2; + VERIFY_IS_APPROX(m3.template triangularView().toDenseMatrix(), m1); + + + m1.setZero(); + m1.template triangularView() = 3 * m2; + VERIFY_IS_APPROX(m3.template triangularView().toDenseMatrix(), m1); + m1.setRandom(); + m2 = m1.template triangularView(); + VERIFY(m2.isUpperTriangular()); + VERIFY(!m2.isLowerTriangular()); + m2 = m1.template triangularView(); + VERIFY(m2.isUpperTriangular()); + VERIFY(m2.diagonal().isMuchSmallerThan(RealScalar(1))); + m2 = m1.template triangularView(); + VERIFY(m2.isUpperTriangular()); + m2.diagonal().array() -= Scalar(1); + VERIFY(m2.diagonal().isMuchSmallerThan(RealScalar(1))); + m2 = m1.template triangularView(); + VERIFY(m2.isLowerTriangular()); + VERIFY(!m2.isUpperTriangular()); + m2 = m1.template triangularView(); + VERIFY(m2.isLowerTriangular()); + VERIFY(m2.diagonal().isMuchSmallerThan(RealScalar(1))); + m2 = m1.template triangularView(); + VERIFY(m2.isLowerTriangular()); + m2.diagonal().array() -= Scalar(1); + VERIFY(m2.diagonal().isMuchSmallerThan(RealScalar(1))); + // test swap + m1.setOnes(); + m2.setZero(); + m2.template triangularView().swap(m1); + m3.setZero(); + m3.template triangularView().setOnes(); + VERIFY_IS_APPROX(m2,m3); +} + +void bug_159() +{ + Matrix3d m = Matrix3d::Random().triangularView(); + EIGEN_UNUSED_VARIABLE(m) +} + +void test_triangular() +{ + int maxsize = (std::min)(EIGEN_TEST_MAX_SIZE,20); + for(int i = 0; i < g_repeat ; i++) + { + int r = internal::random(2,maxsize); TEST_SET_BUT_UNUSED_VARIABLE(r) + int c = internal::random(2,maxsize); TEST_SET_BUT_UNUSED_VARIABLE(c) + + CALL_SUBTEST_1( triangular_square(Matrix()) ); + CALL_SUBTEST_2( triangular_square(Matrix()) ); + CALL_SUBTEST_3( triangular_square(Matrix3d()) ); + CALL_SUBTEST_4( triangular_square(Matrix,8, 8>()) ); + CALL_SUBTEST_5( triangular_square(MatrixXcd(r,r)) ); + CALL_SUBTEST_6( triangular_square(Matrix(r, r)) ); + + CALL_SUBTEST_7( triangular_rect(Matrix()) ); + CALL_SUBTEST_8( triangular_rect(Matrix()) ); + CALL_SUBTEST_9( triangular_rect(MatrixXcf(r, c)) ); + CALL_SUBTEST_5( triangular_rect(MatrixXcd(r, c)) ); + CALL_SUBTEST_6( triangular_rect(Matrix(r, c)) ); + } + + CALL_SUBTEST_1( bug_159() ); +} diff --git a/thirdparty/eigen/test/umeyama.cpp b/thirdparty/eigen/test/umeyama.cpp new file mode 100644 index 000000000..2e8092434 --- /dev/null +++ b/thirdparty/eigen/test/umeyama.cpp @@ -0,0 +1,183 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include +#include + +#include // required for MatrixBase::determinant +#include // required for SVD + +using namespace Eigen; + +// Constructs a random matrix from the unitary group U(size). +template +Eigen::Matrix randMatrixUnitary(int size) +{ + typedef T Scalar; + typedef Eigen::Matrix MatrixType; + + MatrixType Q; + + int max_tries = 40; + double is_unitary = false; + + while (!is_unitary && max_tries > 0) + { + // initialize random matrix + Q = MatrixType::Random(size, size); + + // orthogonalize columns using the Gram-Schmidt algorithm + for (int col = 0; col < size; ++col) + { + typename MatrixType::ColXpr colVec = Q.col(col); + for (int prevCol = 0; prevCol < col; ++prevCol) + { + typename MatrixType::ColXpr prevColVec = Q.col(prevCol); + colVec -= colVec.dot(prevColVec)*prevColVec; + } + Q.col(col) = colVec.normalized(); + } + + // this additional orthogonalization is not necessary in theory but should enhance + // the numerical orthogonality of the matrix + for (int row = 0; row < size; ++row) + { + typename MatrixType::RowXpr rowVec = Q.row(row); + for (int prevRow = 0; prevRow < row; ++prevRow) + { + typename MatrixType::RowXpr prevRowVec = Q.row(prevRow); + rowVec -= rowVec.dot(prevRowVec)*prevRowVec; + } + Q.row(row) = rowVec.normalized(); + } + + // final check + is_unitary = Q.isUnitary(); + --max_tries; + } + + if (max_tries == 0) + eigen_assert(false && "randMatrixUnitary: Could not construct unitary matrix!"); + + return Q; +} + +// Constructs a random matrix from the special unitary group SU(size). +template +Eigen::Matrix randMatrixSpecialUnitary(int size) +{ + typedef T Scalar; + + typedef Eigen::Matrix MatrixType; + + // initialize unitary matrix + MatrixType Q = randMatrixUnitary(size); + + // tweak the first column to make the determinant be 1 + Q.col(0) *= numext::conj(Q.determinant()); + + return Q; +} + +template +void run_test(int dim, int num_elements) +{ + using std::abs; + typedef typename internal::traits::Scalar Scalar; + typedef Matrix MatrixX; + typedef Matrix VectorX; + + // MUST be positive because in any other case det(cR_t) may become negative for + // odd dimensions! + const Scalar c = abs(internal::random()); + + MatrixX R = randMatrixSpecialUnitary(dim); + VectorX t = Scalar(50)*VectorX::Random(dim,1); + + MatrixX cR_t = MatrixX::Identity(dim+1,dim+1); + cR_t.block(0,0,dim,dim) = c*R; + cR_t.block(0,dim,dim,1) = t; + + MatrixX src = MatrixX::Random(dim+1, num_elements); + src.row(dim) = Matrix::Constant(num_elements, Scalar(1)); + + MatrixX dst = cR_t*src; + + MatrixX cR_t_umeyama = umeyama(src.block(0,0,dim,num_elements), dst.block(0,0,dim,num_elements)); + + const Scalar error = ( cR_t_umeyama*src - dst ).norm() / dst.norm(); + VERIFY(error < Scalar(40)*std::numeric_limits::epsilon()); +} + +template +void run_fixed_size_test(int num_elements) +{ + using std::abs; + typedef Matrix MatrixX; + typedef Matrix HomMatrix; + typedef Matrix FixedMatrix; + typedef Matrix FixedVector; + + const int dim = Dimension; + + // MUST be positive because in any other case det(cR_t) may become negative for + // odd dimensions! + // Also if c is to small compared to t.norm(), problem is ill-posed (cf. Bug 744) + const Scalar c = internal::random(0.5, 2.0); + + FixedMatrix R = randMatrixSpecialUnitary(dim); + FixedVector t = Scalar(32)*FixedVector::Random(dim,1); + + HomMatrix cR_t = HomMatrix::Identity(dim+1,dim+1); + cR_t.block(0,0,dim,dim) = c*R; + cR_t.block(0,dim,dim,1) = t; + + MatrixX src = MatrixX::Random(dim+1, num_elements); + src.row(dim) = Matrix::Constant(num_elements, Scalar(1)); + + MatrixX dst = cR_t*src; + + Block src_block(src,0,0,dim,num_elements); + Block dst_block(dst,0,0,dim,num_elements); + + HomMatrix cR_t_umeyama = umeyama(src_block, dst_block); + + const Scalar error = ( cR_t_umeyama*src - dst ).squaredNorm(); + + VERIFY(error < Scalar(16)*std::numeric_limits::epsilon()); +} + +void test_umeyama() +{ + for (int i=0; i(40,500); + + // works also for dimensions bigger than 3... + for (int dim=2; dim<8; ++dim) + { + CALL_SUBTEST_1(run_test(dim, num_elements)); + CALL_SUBTEST_2(run_test(dim, num_elements)); + } + + CALL_SUBTEST_3((run_fixed_size_test(num_elements))); + CALL_SUBTEST_4((run_fixed_size_test(num_elements))); + CALL_SUBTEST_5((run_fixed_size_test(num_elements))); + + CALL_SUBTEST_6((run_fixed_size_test(num_elements))); + CALL_SUBTEST_7((run_fixed_size_test(num_elements))); + CALL_SUBTEST_8((run_fixed_size_test(num_elements))); + } + + // Those two calls don't compile and result in meaningful error messages! + // umeyama(MatrixXcf(),MatrixXcf()); + // umeyama(MatrixXcd(),MatrixXcd()); +} diff --git a/thirdparty/eigen/test/umfpack_support.cpp b/thirdparty/eigen/test/umfpack_support.cpp new file mode 100644 index 000000000..37ab11f0b --- /dev/null +++ b/thirdparty/eigen/test/umfpack_support.cpp @@ -0,0 +1,32 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#include "sparse_solver.h" + +#include + +template void test_umfpack_support_T() +{ + UmfPackLU > umfpack_colmajor; + UmfPackLU > umfpack_rowmajor; + + check_sparse_square_solving(umfpack_colmajor); + check_sparse_square_solving(umfpack_rowmajor); + + check_sparse_square_determinant(umfpack_colmajor); + check_sparse_square_determinant(umfpack_rowmajor); +} + +void test_umfpack_support() +{ + CALL_SUBTEST_1(test_umfpack_support_T()); + CALL_SUBTEST_2(test_umfpack_support_T >()); +} + diff --git a/thirdparty/eigen/test/unalignedassert.cpp b/thirdparty/eigen/test/unalignedassert.cpp new file mode 100644 index 000000000..731a08977 --- /dev/null +++ b/thirdparty/eigen/test/unalignedassert.cpp @@ -0,0 +1,180 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_TEST_PART_1) + // default +#elif defined(EIGEN_TEST_PART_2) + #define EIGEN_MAX_STATIC_ALIGN_BYTES 16 + #define EIGEN_MAX_ALIGN_BYTES 16 +#elif defined(EIGEN_TEST_PART_3) + #define EIGEN_MAX_STATIC_ALIGN_BYTES 32 + #define EIGEN_MAX_ALIGN_BYTES 32 +#elif defined(EIGEN_TEST_PART_4) + #define EIGEN_MAX_STATIC_ALIGN_BYTES 64 + #define EIGEN_MAX_ALIGN_BYTES 64 +#endif + +#include "main.h" + +typedef Matrix Vector6f; +typedef Matrix Vector8f; +typedef Matrix Vector12f; + +typedef Matrix Vector5d; +typedef Matrix Vector6d; +typedef Matrix Vector7d; +typedef Matrix Vector8d; +typedef Matrix Vector9d; +typedef Matrix Vector10d; +typedef Matrix Vector12d; + +struct TestNew1 +{ + MatrixXd m; // good: m will allocate its own array, taking care of alignment. + TestNew1() : m(20,20) {} +}; + +struct TestNew2 +{ + Matrix3d m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be 16-byte aligned, + // 8-byte alignment is good enough here, which we'll get automatically +}; + +struct TestNew3 +{ + Vector2f m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be 16-byte aligned +}; + +struct TestNew4 +{ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + Vector2d m; + float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects +}; + +struct TestNew5 +{ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work + Matrix4f m; +}; + +struct TestNew6 +{ + Matrix m; // good: no alignment requested + float f; +}; + +template struct Depends +{ + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(Align) + Vector2d m; + float f; +}; + +template +void check_unalignedassert_good() +{ + T *x, *y; + x = new T; + delete x; + y = new T[2]; + delete[] y; +} + +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +template +void construct_at_boundary(int boundary) +{ + char buf[sizeof(T)+256]; + size_t _buf = reinterpret_cast(buf); + _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned + _buf += boundary; // make exact boundary-aligned + T *x = ::new(reinterpret_cast(_buf)) T; + x[0].setZero(); // just in order to silence warnings + x->~T(); +} +#endif + +void unalignedassert() +{ +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 + construct_at_boundary(4); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(16); + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(16); +#endif + + check_unalignedassert_good(); + check_unalignedassert_good(); + check_unalignedassert_good(); + + check_unalignedassert_good(); + check_unalignedassert_good(); + check_unalignedassert_good(); + check_unalignedassert_good >(); + +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(EIGEN_MAX_ALIGN_BYTES>=16) + { + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + // Complexes are disabled because the compiler might aggressively vectorize + // the initialization of complex coeffs to 0 before we can check for alignedness + //VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + } + for(int b=8; b(b)); + if(b<64) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + //if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + } +#endif +} + +void test_unalignedassert() +{ + CALL_SUBTEST(unalignedassert()); +} diff --git a/thirdparty/eigen/test/unalignedcount.cpp b/thirdparty/eigen/test/unalignedcount.cpp new file mode 100644 index 000000000..d6ffeafdf --- /dev/null +++ b/thirdparty/eigen/test/unalignedcount.cpp @@ -0,0 +1,53 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static int nb_load; +static int nb_loadu; +static int nb_store; +static int nb_storeu; + +#define EIGEN_DEBUG_ALIGNED_LOAD { nb_load++; } +#define EIGEN_DEBUG_UNALIGNED_LOAD { nb_loadu++; } +#define EIGEN_DEBUG_ALIGNED_STORE { nb_store++; } +#define EIGEN_DEBUG_UNALIGNED_STORE { nb_storeu++; } + +#define VERIFY_ALIGNED_UNALIGNED_COUNT(XPR,AL,UL,AS,US) {\ + nb_load = nb_loadu = nb_store = nb_storeu = 0; \ + XPR; \ + if(!(nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US)) \ + std::cerr << " >> " << nb_load << ", " << nb_loadu << ", " << nb_store << ", " << nb_storeu << "\n"; \ + VERIFY( (#XPR) && nb_load==AL && nb_loadu==UL && nb_store==AS && nb_storeu==US ); \ + } + + +#include "main.h" + +void test_unalignedcount() +{ + #if defined(EIGEN_VECTORIZE_AVX) + VectorXf a(40), b(40); + VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 10, 0, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 5, 5, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) -= b.segment(0,40), 5, 5, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) *= 3.5, 5, 0, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) /= 3.5, 5, 0, 5, 0); + #elif defined(EIGEN_VECTORIZE_SSE) + VectorXf a(40), b(40); + VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 20, 0, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 10, 10, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) -= b.segment(0,40), 10, 10, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) *= 3.5, 10, 0, 10, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) /= 3.5, 10, 0, 10, 0); + #else + // The following line is to eliminate "variable not used" warnings + nb_load = nb_loadu = nb_store = nb_storeu = 0; + int a(0), b(0); + VERIFY(a==b); + #endif +} diff --git a/thirdparty/eigen/test/upperbidiagonalization.cpp b/thirdparty/eigen/test/upperbidiagonalization.cpp new file mode 100644 index 000000000..847b34b55 --- /dev/null +++ b/thirdparty/eigen/test/upperbidiagonalization.cpp @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include + +template void upperbidiag(const MatrixType& m) +{ + const typename MatrixType::Index rows = m.rows(); + const typename MatrixType::Index cols = m.cols(); + + typedef Matrix RealMatrixType; + typedef Matrix TransposeMatrixType; + + MatrixType a = MatrixType::Random(rows,cols); + internal::UpperBidiagonalization ubd(a); + RealMatrixType b(rows, cols); + b.setZero(); + b.block(0,0,cols,cols) = ubd.bidiagonal(); + MatrixType c = ubd.householderU() * b * ubd.householderV().adjoint(); + VERIFY_IS_APPROX(a,c); + TransposeMatrixType d = ubd.householderV() * b.adjoint() * ubd.householderU().adjoint(); + VERIFY_IS_APPROX(a.adjoint(),d); +} + +void test_upperbidiagonalization() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( upperbidiag(MatrixXf(3,3)) ); + CALL_SUBTEST_2( upperbidiag(MatrixXd(17,12)) ); + CALL_SUBTEST_3( upperbidiag(MatrixXcf(20,20)) ); + CALL_SUBTEST_4( upperbidiag(Matrix,Dynamic,Dynamic,RowMajor>(16,15)) ); + CALL_SUBTEST_5( upperbidiag(Matrix()) ); + CALL_SUBTEST_6( upperbidiag(Matrix()) ); + CALL_SUBTEST_7( upperbidiag(Matrix()) ); + } +} diff --git a/thirdparty/eigen/test/vectorization_logic.cpp b/thirdparty/eigen/test/vectorization_logic.cpp new file mode 100644 index 000000000..83c1439ad --- /dev/null +++ b/thirdparty/eigen/test/vectorization_logic.cpp @@ -0,0 +1,419 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef EIGEN_TEST_PART_1 +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +#ifdef EIGEN_TEST_PART_2 +#define EIGEN_UNALIGNED_VECTORIZE 0 +#endif + +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif +#define EIGEN_DEBUG_ASSIGN +#include "main.h" +#include + +using internal::demangle_flags; +using internal::demangle_traversal; +using internal::demangle_unrolling; + +template +bool test_assign(const Dst&, const Src&, int traversal, int unrolling) +{ + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + bool res = traits::Traversal==traversal; + if(unrolling==InnerUnrolling+CompleteUnrolling) + res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling); + else + res = res && int(traits::Unrolling)==unrolling; + if(!res) + { + std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + traits::debug(); + std::cerr << " Expected Traversal == " << demangle_traversal(traversal) + << " got " << demangle_traversal(traits::Traversal) << "\n"; + std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling) + << " got " << demangle_unrolling(traits::Unrolling) << "\n"; + } + return res; +} + +template +bool test_assign(int traversal, int unrolling) +{ + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; + if(!res) + { + std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + traits::debug(); + std::cerr << " Expected Traversal == " << demangle_traversal(traversal) + << " got " << demangle_traversal(traits::Traversal) << "\n"; + std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling) + << " got " << demangle_unrolling(traits::Unrolling) << "\n"; + } + return res; +} + +template +bool test_redux(const Xpr&, int traversal, int unrolling) +{ + typedef typename Xpr::Scalar Scalar; + typedef internal::redux_traits,internal::redux_evaluator > traits; + + bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; + if(!res) + { + std::cerr << demangle_flags(Xpr::Flags) << std::endl; + std::cerr << demangle_flags(internal::evaluator::Flags) << std::endl; + traits::debug(); + + std::cerr << " Expected Traversal == " << demangle_traversal(traversal) + << " got " << demangle_traversal(traits::Traversal) << "\n"; + std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling) + << " got " << demangle_unrolling(traits::Unrolling) << "\n"; + } + return res; +} + +template::Vectorizable> +struct vectorization_logic +{ + typedef internal::packet_traits PacketTraits; + + typedef typename internal::packet_traits::type PacketType; + typedef typename internal::unpacket_traits::half HalfPacketType; + enum { + PacketSize = internal::unpacket_traits::size, + HalfPacketSize = internal::unpacket_traits::size + }; + static void run() + { + + typedef Matrix Vector1; + typedef Matrix VectorX; + typedef Matrix MatrixXX; + typedef Matrix Matrix11; + typedef Matrix Matrix22; + typedef Matrix Matrix44; + typedef Matrix Matrix44u; + typedef Matrix Matrix44c; + typedef Matrix Matrix44r; + + typedef Matrix Matrix1; + + typedef Matrix Matrix1u; + + // this type is made such that it can only be vectorized when viewed as a linear 1D vector + typedef Matrix Matrix3; + + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT + VERIFY(test_assign(Vector1(),Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1()+Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template cast(), + InnerVectorizedTraversal,CompleteUnrolling)); + + + VERIFY(test_assign(Vector1(),Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1()+Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix44(),Matrix44()+Matrix44(), + InnerVectorizedTraversal,InnerUnrolling)); + + VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); + + VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(), + (Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal, + CompleteUnrolling)); + + VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) + : LinearTraversal, CompleteUnrolling)); + + VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix44r().row(2),Matrix44r().row(1)+Matrix44r().row(1), + InnerVectorizedTraversal,CompleteUnrolling)); + + if(PacketSize>1) + { + typedef Matrix Matrix33c; + typedef Matrix Vector3; + VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), + LinearTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector3(),Vector3()+Vector3(), + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal), CompleteUnrolling)); + VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? SliceVectorizedTraversal : LinearTraversal), + ((!EIGEN_UNALIGNED_VECTORIZE) && HalfPacketSize==1) ? NoUnrolling : CompleteUnrolling)); + + VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix(),Matrix()+Matrix(), + HalfPacketSize==1 ? InnerVectorizedTraversal : + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : + LinearTraversal, + NoUnrolling)); + + VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling)); + + + VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), + (EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal, CompleteUnrolling|InnerUnrolling)); + + VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()), + InnerVectorizedTraversal,InnerUnrolling+CompleteUnrolling)); + } + + VERIFY(test_redux(Vector1(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix3(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix44(), + LinearVectorizedTraversal,NoUnrolling)); + + VERIFY(test_redux(Matrix44().template block<(Matrix1::Flags&RowMajorBit)?4:PacketSize,(Matrix1::Flags&RowMajorBit)?PacketSize:4>(1,2), + DefaultTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix44c().template block<2*PacketSize,1>(1,2), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix44r().template block<1,2*PacketSize>(2,1), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY((test_assign< + Map >, + Matrix22 + >(InnerVectorizedTraversal,CompleteUnrolling))); + + VERIFY((test_assign< + Map, AlignedMax, InnerStride<3*PacketSize> >, + Matrix + >(DefaultTraversal,PacketSize>=8?InnerUnrolling:CompleteUnrolling))); + + VERIFY((test_assign(Matrix11(), Matrix()*Matrix(), + InnerVectorizedTraversal, CompleteUnrolling))); + #endif + + VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3), + SliceVectorizedTraversal,NoUnrolling)); + + VERIFY(test_redux(VectorX(10), + LinearVectorizedTraversal,NoUnrolling)); + } +}; + +template struct vectorization_logic +{ + static void run() {} +}; + +template::type>::half, + typename internal::packet_traits::type>::value > +struct vectorization_logic_half +{ + typedef internal::packet_traits PacketTraits; + typedef typename internal::unpacket_traits::type>::half PacketType; + enum { + PacketSize = internal::unpacket_traits::size + }; + static void run() + { + + typedef Matrix Vector1; + typedef Matrix Matrix11; + typedef Matrix Matrix57; + typedef Matrix Matrix35; + typedef Matrix Matrix57u; +// typedef Matrix Matrix44; +// typedef Matrix Matrix44u; +// typedef Matrix Matrix44c; +// typedef Matrix Matrix44r; + + typedef Matrix Matrix1; + + typedef Matrix Matrix1u; + + // this type is made such that it can only be vectorized when viewed as a linear 1D vector + typedef Matrix Matrix3; + + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT + VERIFY(test_assign(Vector1(),Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1()+Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template segment(0).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment(0)-Vector1().template segment(0)).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template cast(), + InnerVectorizedTraversal,CompleteUnrolling)); + + + VERIFY(test_assign(Vector1(),Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1()+Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix57(),Matrix57()+Matrix57(), + InnerVectorizedTraversal,InnerUnrolling)); + + VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); + + VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); + + if(PacketSize>1) + { + typedef Matrix Matrix33c; + VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), + LinearTraversal,CompleteUnrolling)); + VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()), + PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix(),Matrix()+Matrix(), + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal, + NoUnrolling)); + + VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); + + VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()), + InnerVectorizedTraversal,InnerUnrolling+CompleteUnrolling)); + } + + VERIFY(test_redux(Vector1(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix3(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix35(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix57().template block(1,0), + DefaultTraversal,CompleteUnrolling)); + + VERIFY((test_assign< + Map, AlignedMax, InnerStride<3*PacketSize> >, + Matrix + >(DefaultTraversal,CompleteUnrolling))); + + VERIFY((test_assign(Matrix57(), Matrix()*Matrix(), + InnerVectorizedTraversal, InnerUnrolling|CompleteUnrolling))); + #endif + } +}; + +template struct vectorization_logic_half +{ + static void run() {} +}; + +void test_vectorization_logic() +{ + +#ifdef EIGEN_VECTORIZE + + CALL_SUBTEST( vectorization_logic::run() ); + CALL_SUBTEST( vectorization_logic::run() ); + CALL_SUBTEST( vectorization_logic::run() ); + CALL_SUBTEST( vectorization_logic >::run() ); + CALL_SUBTEST( vectorization_logic >::run() ); + + CALL_SUBTEST( vectorization_logic_half::run() ); + CALL_SUBTEST( vectorization_logic_half::run() ); + CALL_SUBTEST( vectorization_logic_half::run() ); + CALL_SUBTEST( vectorization_logic_half >::run() ); + CALL_SUBTEST( vectorization_logic_half >::run() ); + + if(internal::packet_traits::Vectorizable) + { + VERIFY(test_assign(Matrix(),Matrix()+Matrix(), + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix(), + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); + } + + if(internal::packet_traits::Vectorizable) + { + VERIFY(test_assign(Matrix(),Matrix()+Matrix(), + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix(), + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); + } +#endif // EIGEN_VECTORIZE + +} diff --git a/thirdparty/eigen/test/vectorwiseop.cpp b/thirdparty/eigen/test/vectorwiseop.cpp new file mode 100644 index 000000000..739eacaf3 --- /dev/null +++ b/thirdparty/eigen/test/vectorwiseop.cpp @@ -0,0 +1,252 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define TEST_ENABLE_TEMPORARY_TRACKING +#define EIGEN_NO_STATIC_ASSERT + +#include "main.h" + +template void vectorwiseop_array(const ArrayType& m) +{ + typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + typedef Array ColVectorType; + typedef Array RowVectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + ArrayType m1 = ArrayType::Random(rows, cols), + m2(rows, cols), + m3(rows, cols); + + ColVectorType colvec = ColVectorType::Random(rows); + RowVectorType rowvec = RowVectorType::Random(cols); + + // test addition + + m2 = m1; + m2.colwise() += colvec; + VERIFY_IS_APPROX(m2, m1.colwise() + colvec); + VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec); + + VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + + m2 = m1; + m2.rowwise() += rowvec; + VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); + VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); + + VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + + // test substraction + + m2 = m1; + m2.colwise() -= colvec; + VERIFY_IS_APPROX(m2, m1.colwise() - colvec); + VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec); + + VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + + m2 = m1; + m2.rowwise() -= rowvec; + VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec); + VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec); + + VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + + // test multiplication + + m2 = m1; + m2.colwise() *= colvec; + VERIFY_IS_APPROX(m2, m1.colwise() * colvec); + VERIFY_IS_APPROX(m2.col(c), m1.col(c) * colvec); + + VERIFY_RAISES_ASSERT(m2.colwise() *= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() * colvec.transpose()); + + m2 = m1; + m2.rowwise() *= rowvec; + VERIFY_IS_APPROX(m2, m1.rowwise() * rowvec); + VERIFY_IS_APPROX(m2.row(r), m1.row(r) * rowvec); + + VERIFY_RAISES_ASSERT(m2.rowwise() *= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() * rowvec.transpose()); + + // test quotient + + m2 = m1; + m2.colwise() /= colvec; + VERIFY_IS_APPROX(m2, m1.colwise() / colvec); + VERIFY_IS_APPROX(m2.col(c), m1.col(c) / colvec); + + VERIFY_RAISES_ASSERT(m2.colwise() /= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() / colvec.transpose()); + + m2 = m1; + m2.rowwise() /= rowvec; + VERIFY_IS_APPROX(m2, m1.rowwise() / rowvec); + VERIFY_IS_APPROX(m2.row(r), m1.row(r) / rowvec); + + VERIFY_RAISES_ASSERT(m2.rowwise() /= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() / rowvec.transpose()); + + m2 = m1; + // yes, there might be an aliasing issue there but ".rowwise() /=" + // is supposed to evaluate " m2.colwise().sum()" into a temporary to avoid + // evaluating the reduction multiple times + if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic) + { + m2.rowwise() /= m2.colwise().sum(); + VERIFY_IS_APPROX(m2, m1.rowwise() / m1.colwise().sum()); + } + + // all/any + Array mb(rows,cols); + mb = (m1.real()<=0.7).colwise().all(); + VERIFY( (mb.col(c) == (m1.real().col(c)<=0.7).all()).all() ); + mb = (m1.real()<=0.7).rowwise().all(); + VERIFY( (mb.row(r) == (m1.real().row(r)<=0.7).all()).all() ); + + mb = (m1.real()>=0.7).colwise().any(); + VERIFY( (mb.col(c) == (m1.real().col(c)>=0.7).any()).all() ); + mb = (m1.real()>=0.7).rowwise().any(); + VERIFY( (mb.row(r) == (m1.real().row(r)>=0.7).any()).all() ); +} + +template void vectorwiseop_matrix(const MatrixType& m) +{ + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef Matrix ColVectorType; + typedef Matrix RowVectorType; + typedef Matrix RealColVectorType; + typedef Matrix RealRowVectorType; + + Index rows = m.rows(); + Index cols = m.cols(); + Index r = internal::random(0, rows-1), + c = internal::random(0, cols-1); + + MatrixType m1 = MatrixType::Random(rows, cols), + m2(rows, cols), + m3(rows, cols); + + ColVectorType colvec = ColVectorType::Random(rows); + RowVectorType rowvec = RowVectorType::Random(cols); + RealColVectorType rcres; + RealRowVectorType rrres; + + // test addition + + m2 = m1; + m2.colwise() += colvec; + VERIFY_IS_APPROX(m2, m1.colwise() + colvec); + VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec); + + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + } + + m2 = m1; + m2.rowwise() += rowvec; + VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); + VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); + + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + } + + // test substraction + + m2 = m1; + m2.colwise() -= colvec; + VERIFY_IS_APPROX(m2, m1.colwise() - colvec); + VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec); + + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + } + + m2 = m1; + m2.rowwise() -= rowvec; + VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec); + VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec); + + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + } + + // test norm + rrres = m1.colwise().norm(); + VERIFY_IS_APPROX(rrres(c), m1.col(c).norm()); + rcres = m1.rowwise().norm(); + VERIFY_IS_APPROX(rcres(r), m1.row(r).norm()); + + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum(), m1.colwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().sum(), m1.rowwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm()); + + // regression for bug 1158 + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum().x(), m1.col(0).cwiseAbs().sum()); + + // test normalized + m2 = m1.colwise().normalized(); + VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized()); + m2 = m1.rowwise().normalized(); + VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized()); + + // test normalize + m2 = m1; + m2.colwise().normalize(); + VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized()); + m2 = m1; + m2.rowwise().normalize(); + VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized()); + + // test with partial reduction of products + Matrix m1m1 = m1 * m1.transpose(); + VERIFY_IS_APPROX( (m1 * m1.transpose()).colwise().sum(), m1m1.colwise().sum()); + Matrix tmp(rows); + VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), (MatrixType::RowsAtCompileTime==Dynamic ? 1 : 0)); + + m2 = m1.rowwise() - (m1.colwise().sum()/RealScalar(m1.rows())).eval(); + m1 = m1.rowwise() - (m1.colwise().sum()/RealScalar(m1.rows())); + VERIFY_IS_APPROX( m1, m2 ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/RealScalar(m1.rows())), (MatrixType::RowsAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime!=1 ? 1 : 0) ); +} + +void test_vectorwiseop() +{ + CALL_SUBTEST_1( vectorwiseop_array(Array22cd()) ); + CALL_SUBTEST_2( vectorwiseop_array(Array()) ); + CALL_SUBTEST_3( vectorwiseop_array(ArrayXXf(3, 4)) ); + CALL_SUBTEST_4( vectorwiseop_matrix(Matrix4cf()) ); + CALL_SUBTEST_5( vectorwiseop_matrix(Matrix()) ); + CALL_SUBTEST_6( vectorwiseop_matrix(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(VectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(RowVectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); +} diff --git a/thirdparty/eigen/test/visitor.cpp b/thirdparty/eigen/test/visitor.cpp new file mode 100644 index 000000000..844170ec6 --- /dev/null +++ b/thirdparty/eigen/test/visitor.cpp @@ -0,0 +1,135 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template void matrixVisitor(const MatrixType& p) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + + Index rows = p.rows(); + Index cols = p.cols(); + + // construct a random matrix where all coefficients are different + MatrixType m; + m = MatrixType::Random(rows, cols); + for(Index i = 0; i < m.size(); i++) + for(Index i2 = 0; i2 < i; i2++) + while(m(i) == m(i2)) // yes, == + m(i) = internal::random(); + + Scalar minc = Scalar(1000), maxc = Scalar(-1000); + Index minrow=0,mincol=0,maxrow=0,maxcol=0; + for(Index j = 0; j < cols; j++) + for(Index i = 0; i < rows; i++) + { + if(m(i,j) < minc) + { + minc = m(i,j); + minrow = i; + mincol = j; + } + if(m(i,j) > maxc) + { + maxc = m(i,j); + maxrow = i; + maxcol = j; + } + } + Index eigen_minrow, eigen_mincol, eigen_maxrow, eigen_maxcol; + Scalar eigen_minc, eigen_maxc; + eigen_minc = m.minCoeff(&eigen_minrow,&eigen_mincol); + eigen_maxc = m.maxCoeff(&eigen_maxrow,&eigen_maxcol); + VERIFY(minrow == eigen_minrow); + VERIFY(maxrow == eigen_maxrow); + VERIFY(mincol == eigen_mincol); + VERIFY(maxcol == eigen_maxcol); + VERIFY_IS_APPROX(minc, eigen_minc); + VERIFY_IS_APPROX(maxc, eigen_maxc); + VERIFY_IS_APPROX(minc, m.minCoeff()); + VERIFY_IS_APPROX(maxc, m.maxCoeff()); + + eigen_maxc = (m.adjoint()*m).maxCoeff(&eigen_maxrow,&eigen_maxcol); + eigen_maxc = (m.adjoint()*m).eval().maxCoeff(&maxrow,&maxcol); + VERIFY(maxrow == eigen_maxrow); + VERIFY(maxcol == eigen_maxcol); +} + +template void vectorVisitor(const VectorType& w) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::Index Index; + + Index size = w.size(); + + // construct a random vector where all coefficients are different + VectorType v; + v = VectorType::Random(size); + for(Index i = 0; i < size; i++) + for(Index i2 = 0; i2 < i; i2++) + while(v(i) == v(i2)) // yes, == + v(i) = internal::random(); + + Scalar minc = v(0), maxc = v(0); + Index minidx=0, maxidx=0; + for(Index i = 0; i < size; i++) + { + if(v(i) < minc) + { + minc = v(i); + minidx = i; + } + if(v(i) > maxc) + { + maxc = v(i); + maxidx = i; + } + } + Index eigen_minidx, eigen_maxidx; + Scalar eigen_minc, eigen_maxc; + eigen_minc = v.minCoeff(&eigen_minidx); + eigen_maxc = v.maxCoeff(&eigen_maxidx); + VERIFY(minidx == eigen_minidx); + VERIFY(maxidx == eigen_maxidx); + VERIFY_IS_APPROX(minc, eigen_minc); + VERIFY_IS_APPROX(maxc, eigen_maxc); + VERIFY_IS_APPROX(minc, v.minCoeff()); + VERIFY_IS_APPROX(maxc, v.maxCoeff()); + + Index idx0 = internal::random(0,size-1); + Index idx1 = eigen_minidx; + Index idx2 = eigen_maxidx; + VectorType v1(v), v2(v); + v1(idx0) = v1(idx1); + v2(idx0) = v2(idx2); + v1.minCoeff(&eigen_minidx); + v2.maxCoeff(&eigen_maxidx); + VERIFY(eigen_minidx == (std::min)(idx0,idx1)); + VERIFY(eigen_maxidx == (std::min)(idx0,idx2)); +} + +void test_visitor() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( matrixVisitor(Matrix()) ); + CALL_SUBTEST_2( matrixVisitor(Matrix2f()) ); + CALL_SUBTEST_3( matrixVisitor(Matrix4d()) ); + CALL_SUBTEST_4( matrixVisitor(MatrixXd(8, 12)) ); + CALL_SUBTEST_5( matrixVisitor(Matrix(20, 20)) ); + CALL_SUBTEST_6( matrixVisitor(MatrixXi(8, 12)) ); + } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_7( vectorVisitor(Vector4f()) ); + CALL_SUBTEST_7( vectorVisitor(Matrix()) ); + CALL_SUBTEST_8( vectorVisitor(VectorXd(10)) ); + CALL_SUBTEST_9( vectorVisitor(RowVectorXd(10)) ); + CALL_SUBTEST_10( vectorVisitor(VectorXf(33)) ); + } +} diff --git a/thirdparty/eigen/test/zerosized.cpp b/thirdparty/eigen/test/zerosized.cpp new file mode 100644 index 000000000..477ff0070 --- /dev/null +++ b/thirdparty/eigen/test/zerosized.cpp @@ -0,0 +1,102 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + + +template void zeroReduction(const MatrixType& m) { + // Reductions that must hold for zero sized objects + VERIFY(m.all()); + VERIFY(!m.any()); + VERIFY(m.prod()==1); + VERIFY(m.sum()==0); + VERIFY(m.count()==0); + VERIFY(m.allFinite()); + VERIFY(!m.hasNaN()); +} + + +template void zeroSizedMatrix() +{ + MatrixType t1; + typedef typename MatrixType::Scalar Scalar; + + if (MatrixType::SizeAtCompileTime == Dynamic || MatrixType::SizeAtCompileTime == 0) + { + zeroReduction(t1); + if (MatrixType::RowsAtCompileTime == Dynamic) + VERIFY(t1.rows() == 0); + if (MatrixType::ColsAtCompileTime == Dynamic) + VERIFY(t1.cols() == 0); + + if (MatrixType::RowsAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == Dynamic) + { + + MatrixType t2(0, 0), t3(t1); + VERIFY(t2.rows() == 0); + VERIFY(t2.cols() == 0); + + zeroReduction(t2); + VERIFY(t1==t2); + } + } + + if(MatrixType::MaxColsAtCompileTime!=0 && MatrixType::MaxRowsAtCompileTime!=0) + { + Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random(1,10) : Index(MatrixType::RowsAtCompileTime); + Index cols = MatrixType::ColsAtCompileTime==Dynamic ? internal::random(1,10) : Index(MatrixType::ColsAtCompileTime); + MatrixType m(rows,cols); + zeroReduction(m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols)); + zeroReduction(m.template block(0,0,rows,0)); + zeroReduction(m.template block<0,1>(0,0)); + zeroReduction(m.template block<1,0>(0,0)); + Matrix prod = m.template block(0,0,rows,0) * m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols); + VERIFY(prod.rows()==rows && prod.cols()==cols); + VERIFY(prod.isZero()); + prod = m.template block<1,0>(0,0) * m.template block<0,1>(0,0); + VERIFY(prod.size()==1); + VERIFY(prod.isZero()); + } +} + +template void zeroSizedVector() +{ + VectorType t1; + + if (VectorType::SizeAtCompileTime == Dynamic || VectorType::SizeAtCompileTime==0) + { + zeroReduction(t1); + VERIFY(t1.size() == 0); + VectorType t2(DenseIndex(0)); // DenseIndex disambiguates with 0-the-null-pointer (error with gcc 4.4 and MSVC8) + VERIFY(t2.size() == 0); + zeroReduction(t2); + + VERIFY(t1==t2); + } +} + +void test_zerosized() +{ + zeroSizedMatrix(); + zeroSizedMatrix(); + zeroSizedMatrix >(); + zeroSizedMatrix(); + zeroSizedMatrix >(); + zeroSizedMatrix >(); + zeroSizedMatrix >(); + zeroSizedMatrix >(); + zeroSizedMatrix >(); + zeroSizedMatrix >(); + + zeroSizedVector(); + zeroSizedVector(); + zeroSizedVector(); + zeroSizedVector >(); + zeroSizedVector >(); +} diff --git a/thirdparty/eigen/unsupported/CMakeLists.txt b/thirdparty/eigen/unsupported/CMakeLists.txt new file mode 100644 index 000000000..4fef40a86 --- /dev/null +++ b/thirdparty/eigen/unsupported/CMakeLists.txt @@ -0,0 +1,7 @@ +add_subdirectory(Eigen) +add_subdirectory(doc EXCLUDE_FROM_ALL) +if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest +else() + add_subdirectory(test EXCLUDE_FROM_ALL) +endif() diff --git a/thirdparty/eigen/unsupported/Eigen/AdolcForward b/thirdparty/eigen/unsupported/Eigen/AdolcForward new file mode 100644 index 000000000..15f5f0731 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/AdolcForward @@ -0,0 +1,156 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ADLOC_FORWARD +#define EIGEN_ADLOC_FORWARD + +//-------------------------------------------------------------------------------- +// +// This file provides support for adolc's adouble type in forward mode. +// ADOL-C is a C++ automatic differentiation library, +// see https://projects.coin-or.org/ADOL-C for more information. +// +// Note that the maximal number of directions is controlled by +// the preprocessor token NUMBER_DIRECTIONS. The default is 2. +// +//-------------------------------------------------------------------------------- + +#define ADOLC_TAPELESS +#ifndef NUMBER_DIRECTIONS +# define NUMBER_DIRECTIONS 2 +#endif +#include + +// adolc defines some very stupid macros: +#if defined(malloc) +# undef malloc +#endif + +#if defined(calloc) +# undef calloc +#endif + +#if defined(realloc) +# undef realloc +#endif + +#include + +namespace Eigen { + +/** + * \defgroup AdolcForward_Module Adolc forward module + * This module provides support for adolc's adouble type in forward mode. + * ADOL-C is a C++ automatic differentiation library, + * see https://projects.coin-or.org/ADOL-C for more information. + * It mainly consists in: + * - a struct Eigen::NumTraits specialization + * - overloads of internal::* math function for adtl::adouble type. + * + * Note that the maximal number of directions is controlled by + * the preprocessor token NUMBER_DIRECTIONS. The default is 2. + * + * \code + * #include + * \endcode + */ + //@{ + +} // namespace Eigen + +// Eigen's require a few additional functions which must be defined in the same namespace +// than the custom scalar type own namespace +namespace adtl { + +inline const adouble& conj(const adouble& x) { return x; } +inline const adouble& real(const adouble& x) { return x; } +inline adouble imag(const adouble&) { return 0.; } +inline adouble abs(const adouble& x) { return fabs(x); } +inline adouble abs2(const adouble& x) { return x*x; } + +} + +namespace Eigen { + +template<> struct NumTraits + : NumTraits +{ + typedef adtl::adouble Real; + typedef adtl::adouble NonInteger; + typedef adtl::adouble Nested; + enum { + IsComplex = 0, + IsInteger = 0, + IsSigned = 1, + RequireInitialization = 1, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; +}; + +template class AdolcForwardJacobian : public Functor +{ + typedef adtl::adouble ActiveScalar; +public: + + AdolcForwardJacobian() : Functor() {} + AdolcForwardJacobian(const Functor& f) : Functor(f) {} + + // forward constructors + template + AdolcForwardJacobian(const T0& a0) : Functor(a0) {} + template + AdolcForwardJacobian(const T0& a0, const T1& a1) : Functor(a0, a1) {} + template + AdolcForwardJacobian(const T0& a0, const T1& a1, const T1& a2) : Functor(a0, a1, a2) {} + + typedef typename Functor::InputType InputType; + typedef typename Functor::ValueType ValueType; + typedef typename Functor::JacobianType JacobianType; + + typedef Matrix ActiveInput; + typedef Matrix ActiveValue; + + void operator() (const InputType& x, ValueType* v, JacobianType* _jac) const + { + eigen_assert(v!=0); + if (!_jac) + { + Functor::operator()(x, v); + return; + } + + JacobianType& jac = *_jac; + + ActiveInput ax = x.template cast(); + ActiveValue av(jac.rows()); + + for (int j=0; j +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ALIGNED_VECTOR3 +#define EIGEN_ALIGNED_VECTOR3 + +#include + +namespace Eigen { + +/** + * \defgroup AlignedVector3_Module Aligned vector3 module + * + * \code + * #include + * \endcode + */ + //@{ + + +/** \class AlignedVector3 + * + * \brief A vectorization friendly 3D vector + * + * This class represents a 3D vector internally using a 4D vector + * such that vectorization can be seamlessly enabled. Of course, + * the same result can be achieved by directly using a 4D vector. + * This class makes this process simpler. + * + */ +// TODO specialize Cwise +template class AlignedVector3; + +namespace internal { +template struct traits > + : traits > +{ +}; +} + +template class AlignedVector3 + : public MatrixBase > +{ + typedef Matrix<_Scalar,4,1> CoeffType; + CoeffType m_coeffs; + public: + + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(AlignedVector3) + using Base::operator*; + + inline Index rows() const { return 3; } + inline Index cols() const { return 1; } + + Scalar* data() { return m_coeffs.data(); } + const Scalar* data() const { return m_coeffs.data(); } + Index innerStride() const { return 1; } + Index outerStride() const { return 3; } + + inline const Scalar& coeff(Index row, Index col) const + { return m_coeffs.coeff(row, col); } + + inline Scalar& coeffRef(Index row, Index col) + { return m_coeffs.coeffRef(row, col); } + + inline const Scalar& coeff(Index index) const + { return m_coeffs.coeff(index); } + + inline Scalar& coeffRef(Index index) + { return m_coeffs.coeffRef(index);} + + + inline AlignedVector3(const Scalar& x, const Scalar& y, const Scalar& z) + : m_coeffs(x, y, z, Scalar(0)) + {} + + inline AlignedVector3(const AlignedVector3& other) + : Base(), m_coeffs(other.m_coeffs) + {} + + template + struct generic_assign_selector {}; + + template struct generic_assign_selector + { + inline static void run(AlignedVector3& dest, const XprType& src) + { + dest.m_coeffs = src; + } + }; + + template struct generic_assign_selector + { + inline static void run(AlignedVector3& dest, const XprType& src) + { + dest.m_coeffs.template head<3>() = src; + dest.m_coeffs.w() = Scalar(0); + } + }; + + template + inline AlignedVector3(const MatrixBase& other) + { + generic_assign_selector::run(*this,other.derived()); + } + + inline AlignedVector3& operator=(const AlignedVector3& other) + { m_coeffs = other.m_coeffs; return *this; } + + template + inline AlignedVector3& operator=(const MatrixBase& other) + { + generic_assign_selector::run(*this,other.derived()); + return *this; + } + + inline AlignedVector3 operator+(const AlignedVector3& other) const + { return AlignedVector3(m_coeffs + other.m_coeffs); } + + inline AlignedVector3& operator+=(const AlignedVector3& other) + { m_coeffs += other.m_coeffs; return *this; } + + inline AlignedVector3 operator-(const AlignedVector3& other) const + { return AlignedVector3(m_coeffs - other.m_coeffs); } + + inline AlignedVector3 operator-=(const AlignedVector3& other) + { m_coeffs -= other.m_coeffs; return *this; } + + inline AlignedVector3 operator*(const Scalar& s) const + { return AlignedVector3(m_coeffs * s); } + + inline friend AlignedVector3 operator*(const Scalar& s,const AlignedVector3& vec) + { return AlignedVector3(s * vec.m_coeffs); } + + inline AlignedVector3& operator*=(const Scalar& s) + { m_coeffs *= s; return *this; } + + inline AlignedVector3 operator/(const Scalar& s) const + { return AlignedVector3(m_coeffs / s); } + + inline AlignedVector3& operator/=(const Scalar& s) + { m_coeffs /= s; return *this; } + + inline Scalar dot(const AlignedVector3& other) const + { + eigen_assert(m_coeffs.w()==Scalar(0)); + eigen_assert(other.m_coeffs.w()==Scalar(0)); + return m_coeffs.dot(other.m_coeffs); + } + + inline void normalize() + { + m_coeffs /= norm(); + } + + inline AlignedVector3 normalized() const + { + return AlignedVector3(m_coeffs / norm()); + } + + inline Scalar sum() const + { + eigen_assert(m_coeffs.w()==Scalar(0)); + return m_coeffs.sum(); + } + + inline Scalar squaredNorm() const + { + eigen_assert(m_coeffs.w()==Scalar(0)); + return m_coeffs.squaredNorm(); + } + + inline Scalar norm() const + { + using std::sqrt; + return sqrt(squaredNorm()); + } + + inline AlignedVector3 cross(const AlignedVector3& other) const + { + return AlignedVector3(m_coeffs.cross3(other.m_coeffs)); + } + + template + inline bool isApprox(const MatrixBase& other, const RealScalar& eps=NumTraits::dummy_precision()) const + { + return m_coeffs.template head<3>().isApprox(other,eps); + } + + CoeffType& coeffs() { return m_coeffs; } + const CoeffType& coeffs() const { return m_coeffs; } +}; + +namespace internal { + +template +struct eval, Dense> +{ + typedef const AlignedVector3<_Scalar>& type; +}; + +template +struct evaluator > + : evaluator > +{ + typedef AlignedVector3 XprType; + typedef evaluator > Base; + + evaluator(const XprType &m) : Base(m.coeffs()) {} +}; + +} + +//@} + +} + +#endif // EIGEN_ALIGNED_VECTOR3 diff --git a/thirdparty/eigen/unsupported/Eigen/ArpackSupport b/thirdparty/eigen/unsupported/Eigen/ArpackSupport new file mode 100644 index 000000000..37a2799ef --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/ArpackSupport @@ -0,0 +1,31 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARPACKSUPPORT_MODULE_H +#define EIGEN_ARPACKSUPPORT_MODULE_H + +#include + +#include + +/** \defgroup ArpackSupport_Module Arpack support module + * + * This module provides a wrapper to Arpack, a library for sparse eigenvalue decomposition. + * + * \code + * #include + * \endcode + */ + +#include +#include "src/Eigenvalues/ArpackSelfAdjointEigenSolver.h" + +#include + +#endif // EIGEN_ARPACKSUPPORT_MODULE_H +/* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/thirdparty/eigen/unsupported/Eigen/AutoDiff b/thirdparty/eigen/unsupported/Eigen/AutoDiff new file mode 100644 index 000000000..abf5b7d67 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/AutoDiff @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_AUTODIFF_MODULE +#define EIGEN_AUTODIFF_MODULE + +namespace Eigen { + +/** + * \defgroup AutoDiff_Module Auto Diff module + * + * This module features forward automatic differentation via a simple + * templated scalar type wrapper AutoDiffScalar. + * + * Warning : this should NOT be confused with numerical differentiation, which + * is a different method and has its own module in Eigen : \ref NumericalDiff_Module. + * + * \code + * #include + * \endcode + */ +//@{ + +} + +#include "src/AutoDiff/AutoDiffScalar.h" +// #include "src/AutoDiff/AutoDiffVector.h" +#include "src/AutoDiff/AutoDiffJacobian.h" + +namespace Eigen { +//@} +} + +#endif // EIGEN_AUTODIFF_MODULE diff --git a/thirdparty/eigen/unsupported/Eigen/BVH b/thirdparty/eigen/unsupported/Eigen/BVH new file mode 100644 index 000000000..0161a5402 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/BVH @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Ilya Baran +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BVH_MODULE_H +#define EIGEN_BVH_MODULE_H + +#include +#include +#include +#include +#include + +namespace Eigen { + +/** + * \defgroup BVH_Module BVH module + * \brief This module provides generic bounding volume hierarchy algorithms + * and reference tree implementations. + * + * + * \code + * #include + * \endcode + * + * A bounding volume hierarchy (BVH) can accelerate many geometric queries. This module provides a generic implementation + * of the two basic algorithms over a BVH: intersection of a query object against all objects in the hierarchy and minimization + * of a function over the objects in the hierarchy. It also provides intersection and minimization over a cartesian product of + * two BVH's. A BVH accelerates intersection by using the fact that if a query object does not intersect a volume, then it cannot + * intersect any object contained in that volume. Similarly, a BVH accelerates minimization because the minimum of a function + * over a volume is no greater than the minimum of a function over any object contained in it. + * + * Some sample queries that can be written in terms of intersection are: + * - Determine all points where a ray intersects a triangle mesh + * - Given a set of points, determine which are contained in a query sphere + * - Given a set of spheres, determine which contain the query point + * - Given a set of disks, determine if any is completely contained in a query rectangle (represent each 2D disk as a point \f$(x,y,r)\f$ + * in 3D and represent the rectangle as a pyramid based on the original rectangle and shrinking in the \f$r\f$ direction) + * - Given a set of points, count how many pairs are \f$d\pm\epsilon\f$ apart (done by looking at the cartesian product of the set + * of points with itself) + * + * Some sample queries that can be written in terms of function minimization over a set of objects are: + * - Find the intersection between a ray and a triangle mesh closest to the ray origin (function is infinite off the ray) + * - Given a polyline and a query point, determine the closest point on the polyline to the query + * - Find the diameter of a point cloud (done by looking at the cartesian product and using negative distance as the function) + * - Determine how far two meshes are from colliding (this is also a cartesian product query) + * + * This implementation decouples the basic algorithms both from the type of hierarchy (and the types of the bounding volumes) and + * from the particulars of the query. To enable abstraction from the BVH, the BVH is required to implement a generic mechanism + * for traversal. To abstract from the query, the query is responsible for keeping track of results. + * + * To be used in the algorithms, a hierarchy must implement the following traversal mechanism (see KdBVH for a sample implementation): \code + typedef Volume //the type of bounding volume + typedef Object //the type of object in the hierarchy + typedef Index //a reference to a node in the hierarchy--typically an int or a pointer + typedef VolumeIterator //an iterator type over node children--returns Index + typedef ObjectIterator //an iterator over object (leaf) children--returns const Object & + Index getRootIndex() const //returns the index of the hierarchy root + const Volume &getVolume(Index index) const //returns the bounding volume of the node at given index + void getChildren(Index index, VolumeIterator &outVBegin, VolumeIterator &outVEnd, + ObjectIterator &outOBegin, ObjectIterator &outOEnd) const + //getChildren takes a node index and makes [outVBegin, outVEnd) range over its node children + //and [outOBegin, outOEnd) range over its object children + \endcode + * + * To use the hierarchy, call BVIntersect or BVMinimize, passing it a BVH (or two, for cartesian product) and a minimizer or intersector. + * For an intersection query on a single BVH, the intersector encapsulates the query and must provide two functions: + * \code + bool intersectVolume(const Volume &volume) //returns true if the query intersects the volume + bool intersectObject(const Object &object) //returns true if the intersection search should terminate immediately + \endcode + * The guarantee that BVIntersect provides is that intersectObject will be called on every object whose bounding volume + * intersects the query (but possibly on other objects too) unless the search is terminated prematurely. It is the + * responsibility of the intersectObject function to keep track of the results in whatever manner is appropriate. + * The cartesian product intersection and the BVMinimize queries are similar--see their individual documentation. + * + * The following is a simple but complete example for how to use the BVH to accelerate the search for a closest red-blue point pair: + * \include BVH_Example.cpp + * Output: \verbinclude BVH_Example.out + */ +} + +//@{ + +#include "src/BVH/BVAlgorithms.h" +#include "src/BVH/KdBVH.h" + +//@} + +#endif // EIGEN_BVH_MODULE_H diff --git a/thirdparty/eigen/unsupported/Eigen/CMakeLists.txt b/thirdparty/eigen/unsupported/Eigen/CMakeLists.txt new file mode 100644 index 000000000..631a06014 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CMakeLists.txt @@ -0,0 +1,32 @@ +set(Eigen_HEADERS + AdolcForward + AlignedVector3 + ArpackSupport + AutoDiff + BVH + EulerAngles + FFT + IterativeSolvers + KroneckerProduct + LevenbergMarquardt + MatrixFunctions + MoreVectorization + MPRealSupport + NonLinearOptimization + NumericalDiff + OpenGLSupport + Polynomials + Skyline + SparseExtra + SpecialFunctions + Splines + ) + +install(FILES + ${Eigen_HEADERS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") + +add_subdirectory(CXX11) diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/CMakeLists.txt b/thirdparty/eigen/unsupported/Eigen/CXX11/CMakeLists.txt new file mode 100644 index 000000000..385ed240c --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/CMakeLists.txt @@ -0,0 +1,8 @@ +set(Eigen_CXX11_HEADERS Tensor TensorSymmetry ThreadPool) + +install(FILES + ${Eigen_CXX11_HEADERS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/Tensor b/thirdparty/eigen/unsupported/Eigen/CXX11/Tensor new file mode 100644 index 000000000..8b36093f0 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/Tensor @@ -0,0 +1,153 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//#ifndef EIGEN_CXX11_TENSOR_MODULE +//#define EIGEN_CXX11_TENSOR_MODULE + +#include "../../../Eigen/Core" + +#if defined(EIGEN_USE_SYCL) +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite +#include +#include +#include +#include +#include +#endif + +#include + +#include "../SpecialFunctions" +#include "src/util/CXX11Meta.h" +#include "src/util/MaxSizeVector.h" + +/** \defgroup CXX11_Tensor_Module Tensor Module + * + * This module provides a Tensor class for storing arbitrarily indexed + * objects. + * + * \code + * #include + * \endcode + */ + +#include +#include +#include + +#ifdef _WIN32 +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 +#include +#endif + +#ifdef _WIN32 +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +#ifdef EIGEN_USE_THREADS +#include "ThreadPool" +#endif + +#ifdef EIGEN_USE_GPU +#include +#include +#if __cplusplus >= 201103L +#include +#include +#endif +#endif + +#include "src/Tensor/TensorMacros.h" +#include "src/Tensor/TensorForwardDeclarations.h" +#include "src/Tensor/TensorMeta.h" +#include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorCostModel.h" +#include "src/Tensor/TensorDeviceDefault.h" +#include "src/Tensor/TensorDeviceThreadPool.h" +#include "src/Tensor/TensorDeviceCuda.h" +#include "src/Tensor/TensorDeviceSycl.h" +#include "src/Tensor/TensorIndexList.h" +#include "src/Tensor/TensorDimensionList.h" +#include "src/Tensor/TensorDimensions.h" +#include "src/Tensor/TensorInitializer.h" +#include "src/Tensor/TensorTraits.h" +#include "src/Tensor/TensorRandom.h" +#include "src/Tensor/TensorUInt128.h" +#include "src/Tensor/TensorIntDiv.h" +#include "src/Tensor/TensorGlobalFunctions.h" + +#include "src/Tensor/TensorBase.h" + +#include "src/Tensor/TensorEvaluator.h" +#include "src/Tensor/TensorExpr.h" +#include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorReductionCuda.h" +#include "src/Tensor/TensorArgMax.h" +#include "src/Tensor/TensorConcatenation.h" +#include "src/Tensor/TensorContractionMapper.h" +#include "src/Tensor/TensorContractionBlocking.h" +#include "src/Tensor/TensorContraction.h" +#include "src/Tensor/TensorContractionThreadPool.h" +#include "src/Tensor/TensorContractionCuda.h" +#include "src/Tensor/TensorConversion.h" +#include "src/Tensor/TensorConvolution.h" +#include "src/Tensor/TensorFFT.h" +#include "src/Tensor/TensorPatch.h" +#include "src/Tensor/TensorImagePatch.h" +#include "src/Tensor/TensorVolumePatch.h" +#include "src/Tensor/TensorBroadcasting.h" +#include "src/Tensor/TensorChipping.h" +#include "src/Tensor/TensorInflation.h" +#include "src/Tensor/TensorLayoutSwap.h" +#include "src/Tensor/TensorMorphing.h" +#include "src/Tensor/TensorPadding.h" +#include "src/Tensor/TensorReverse.h" +#include "src/Tensor/TensorShuffling.h" +#include "src/Tensor/TensorStriding.h" +#include "src/Tensor/TensorCustomOp.h" +#include "src/Tensor/TensorEvalTo.h" +#include "src/Tensor/TensorForcedEval.h" +#include "src/Tensor/TensorGenerator.h" +#include "src/Tensor/TensorAssign.h" +#include "src/Tensor/TensorScan.h" + +#include "src/Tensor/TensorSycl.h" +#include "src/Tensor/TensorExecutor.h" +#include "src/Tensor/TensorDevice.h" + +#include "src/Tensor/TensorStorage.h" +#include "src/Tensor/Tensor.h" +#include "src/Tensor/TensorFixedSize.h" +#include "src/Tensor/TensorMap.h" +#include "src/Tensor/TensorRef.h" + +#include "src/Tensor/TensorIO.h" + +#include + +//#endif // EIGEN_CXX11_TENSOR_MODULE diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/TensorSymmetry b/thirdparty/eigen/unsupported/Eigen/CXX11/TensorSymmetry new file mode 100644 index 000000000..fb1b0c0fb --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/TensorSymmetry @@ -0,0 +1,42 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE +#define EIGEN_CXX11_TENSORSYMMETRY_MODULE + +#include + +#include + +#include "src/util/CXX11Meta.h" + +/** \defgroup CXX11_TensorSymmetry_Module Tensor Symmetry Module + * + * This module provides a classes that allow for the definition of + * symmetries w.r.t. tensor indices. + * + * Including this module will implicitly include the Tensor module. + * + * \code + * #include + * \endcode + */ + +#include "src/TensorSymmetry/util/TemplateGroupTheory.h" +#include "src/TensorSymmetry/Symmetry.h" +#include "src/TensorSymmetry/StaticSymmetry.h" +#include "src/TensorSymmetry/DynamicSymmetry.h" + +#include + +#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/ThreadPool b/thirdparty/eigen/unsupported/Eigen/CXX11/ThreadPool new file mode 100644 index 000000000..09d637e9a --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/ThreadPool @@ -0,0 +1,65 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_MODULE +#define EIGEN_CXX11_THREADPOOL_MODULE + +#include "../../../Eigen/Core" + +#include + +/** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module + * + * This module provides 2 threadpool implementations + * - a simple reference implementation + * - a faster non blocking implementation + * + * This module requires C++11. + * + * \code + * #include + * \endcode + */ + + +// The code depends on CXX11, so only include the module if the +// compiler supports it. +#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/util/CXX11Meta.h" +#include "src/util/MaxSizeVector.h" + +#include "src/ThreadPool/ThreadLocal.h" +#include "src/ThreadPool/ThreadYield.h" +#include "src/ThreadPool/EventCount.h" +#include "src/ThreadPool/RunQueue.h" +#include "src/ThreadPool/ThreadPoolInterface.h" +#include "src/ThreadPool/ThreadEnvironment.h" +#include "src/ThreadPool/SimpleThreadPool.h" +#include "src/ThreadPool/NonBlockingThreadPool.h" + +#endif + +#include + +#endif // EIGEN_CXX11_THREADPOOL_MODULE + diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md new file mode 100644 index 000000000..02146527b --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -0,0 +1,1757 @@ +# Eigen Tensors + +Tensors are multidimensional arrays of elements. Elements are typically scalars, +but more complex types such as strings are also supported. + +[TOC] + +## Tensor Classes + +You can manipulate a tensor with one of the following classes. They all are in +the namespace ```::Eigen.``` + + +### Class Tensor + +This is the class to use to create a tensor and allocate memory for it. The +class is templatized with the tensor datatype, such as float or int, and the +tensor rank. The rank is the number of dimensions, for example rank 2 is a +matrix. + +Tensors of this class are resizable. For example, if you assign a tensor of a +different size to a Tensor, that tensor is resized to match its new value. + +#### Constructor Tensor(size0, size1, ...) + +Constructor for a Tensor. The constructor must be passed ```rank``` integers +indicating the sizes of the instance along each of the the ```rank``` +dimensions. + + // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns + // memory to hold 24 floating point values (24 = 2 x 3 x 4). + Tensor t_3d(2, 3, 4); + + // Resize t_3d by assigning a tensor of different sizes, but same rank. + t_3d = Tensor(3, 4, 3); + +#### Constructor Tensor(size_array) + +Constructor where the sizes for the constructor are specified as an array of +values instead of an explicitly list of parameters. The array type to use is +```Eigen::array```. The array can be constructed automatically +from an initializer list. + + // Create a tensor of strings of rank 2 with sizes 5, 7. + Tensor t_2d({5, 7}); + + +### Class TensorFixedSize> + +Class to use for tensors of fixed size, where the size is known at compile +time. Fixed sized tensors can provide very fast computations because all their +dimensions are known by the compiler. FixedSize tensors are not resizable. + +If the total number of elements in a fixed size tensor is small enough the +tensor data is held onto the stack and does not cause heap allocation and free. + + // Create a 4 x 3 tensor of floats. + TensorFixedSize> t_4x3; + +### Class TensorMap> + +This is the class to use to create a tensor on top of memory allocated and +owned by another part of your code. It allows to view any piece of allocated +memory as a Tensor. Instances of this class do not own the memory where the +data are stored. + +A TensorMap is not resizable because it does not own the memory where its data +are stored. + +#### Constructor TensorMap>(data, size0, size1, ...) + +Constructor for a Tensor. The constructor must be passed a pointer to the +storage for the data, and "rank" size attributes. The storage has to be +large enough to hold all the data. + + // Map a tensor of ints on top of stack-allocated storage. + int storage[128]; // 2 x 4 x 2 x 8 = 128 + TensorMap t_4d(storage, 2, 4, 2, 8); + + // The same storage can be viewed as a different tensor. + // You can also pass the sizes as an array. + TensorMap t_2d(storage, 16, 8); + + // You can also map fixed-size tensors. Here we get a 1d view of + // the 2d fixed-size tensor. + Tensor> t_4x3; + TensorMap t_12(t_4x3, 12); + + +#### Class TensorRef + +See Assigning to a TensorRef below. + +## Accessing Tensor Elements + +#### tensor(index0, index1...) + +Return the element at position ```(index0, index1...)``` in tensor +```tensor```. You must pass as many parameters as the rank of ```tensor```. +The expression can be used as an l-value to set the value of the element at the +specified position. The value returned is of the datatype of the tensor. + + // Set the value of the element at position (0, 1, 0); + Tensor t_3d(2, 3, 4); + t_3d(0, 1, 0) = 12.0f; + + // Initialize all elements to random values. + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 4; ++k) { + t_3d(i, j, k) = ...some random value...; + } + } + } + + // Print elements of a tensor. + for (int i = 0; i < 2; ++i) { + LOG(INFO) << t_3d(i, 0, 0); + } + + +## TensorLayout + +The tensor library supports 2 layouts: ```ColMajor``` (the default) and +```RowMajor```. Only the default column major layout is currently fully +supported, and it is therefore not recommended to attempt to use the row major +layout at the moment. + +The layout of a tensor is optionally specified as part of its type. If not +specified explicitly column major is assumed. + + Tensor col_major; // equivalent to Tensor + TensorMap > row_major(data, ...); + +All the arguments to an expression must use the same layout. Attempting to mix +different layouts will result in a compilation error. + +It is possible to change the layout of a tensor or an expression using the +```swap_layout()``` method. Note that this will also reverse the order of the +dimensions. + + Tensor col_major(2, 4); + Tensor row_major(2, 4); + + Tensor col_major_result = col_major; // ok, layouts match + Tensor col_major_result = row_major; // will not compile + + // Simple layout swap + col_major_result = row_major.swap_layout(); + eigen_assert(col_major_result.dimension(0) == 4); + eigen_assert(col_major_result.dimension(1) == 2); + + // Swap the layout and preserve the order of the dimensions + array shuffle(1, 0); + col_major_result = row_major.swap_layout().shuffle(shuffle); + eigen_assert(col_major_result.dimension(0) == 2); + eigen_assert(col_major_result.dimension(1) == 4); + + +## Tensor Operations + +The Eigen Tensor library provides a vast library of operations on Tensors: +numerical operations such as addition and multiplication, geometry operations +such as slicing and shuffling, etc. These operations are available as methods +of the Tensor classes, and in some cases as operator overloads. For example +the following code computes the elementwise addition of two tensors: + + Tensor t1(2, 3, 4); + ...set some values in t1... + Tensor t2(2, 3, 4); + ...set some values in t2... + // Set t3 to the element wise sum of t1 and t2 + Tensor t3 = t1 + t2; + +While the code above looks easy enough, it is important to understand that the +expression ```t1 + t2``` is not actually adding the values of the tensors. The +expression instead constructs a "tensor operator" object of the class +TensorCwiseBinaryOp, which has references to the tensors +```t1``` and ```t2```. This is a small C++ object that knows how to add +```t1``` and ```t2```. It is only when the value of the expression is assigned +to the tensor ```t3``` that the addition is actually performed. Technically, +this happens through the overloading of ```operator=()``` in the Tensor class. + +This mechanism for computing tensor expressions allows for lazy evaluation and +optimizations which are what make the tensor library very fast. + +Of course, the tensor operators do nest, and the expression ```t1 + t2 * +0.3f``` is actually represented with the (approximate) tree of operators: + + TensorCwiseBinaryOp(t1, TensorCwiseUnaryOp(t2, 0.3f)) + + +### Tensor Operations and C++ "auto" + +Because Tensor operations create tensor operators, the C++ ```auto``` keyword +does not have its intuitive meaning. Consider these 2 lines of code: + + Tensor t3 = t1 + t2; + auto t4 = t1 + t2; + +In the first line we allocate the tensor ```t3``` and it will contain the +result of the addition of ```t1``` and ```t2```. In the second line, ```t4``` +is actually the tree of tensor operators that will compute the addition of +```t1``` and ```t2```. In fact, ```t4``` is *not* a tensor and you cannot get +the values of its elements: + + Tensor t3 = t1 + t2; + cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0) + + auto t4 = t1 + t2; + cout << t4(0, 0, 0); // Compilation error! + +When you use ```auto``` you do not get a Tensor as a result but instead a +non-evaluated expression. So only use ```auto``` to delay evaluation. + +Unfortunately, there is no single underlying concrete type for holding +non-evaluated expressions, hence you have to use auto in the case when you do +want to hold non-evaluated expressions. + +When you need the results of set of tensor computations you have to assign the +result to a Tensor that will be capable of holding onto them. This can be +either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing +piece of memory. All the following will work: + + auto t4 = t1 + t2; + + Tensor result = t4; // Could also be: result(t4); + cout << result(0, 0, 0); + + TensorMap result(, , ...) = t4; + cout << result(0, 0, 0); + + TensorFixedSize> result = t4; + cout << result(0, 0, 0); + +Until you need the results, you can keep the operation around, and even reuse +it for additional operations. As long as you keep the expression as an +operation, no computation is performed. + + // One way to compute exp((t1 + t2) * 0.2f); + auto t3 = t1 + t2; + auto t4 = t3 * 0.2f; + auto t5 = t4.exp(); + Tensor result = t5; + + // Another way, exactly as efficient as the previous one: + Tensor result = ((t1 + t2) * 0.2f).exp(); + +### Controlling When Expression are Evaluated + +There are several ways to control when expressions are evaluated: + +* Assignment to a Tensor, TensorFixedSize, or TensorMap. +* Use of the eval() method. +* Assignment to a TensorRef. + +#### Assigning to a Tensor, TensorFixedSize, or TensorMap. + +The most common way to evaluate an expression is to assign it to a Tensor. In +the example below, the ```auto``` declarations make the intermediate values +"Operations", not Tensors, and do not cause the expressions to be evaluated. +The assignment to the Tensor ```result``` causes the evaluation of all the +operations. + + auto t3 = t1 + t2; // t3 is an Operation. + auto t4 = t3 * 0.2f; // t4 is an Operation. + auto t5 = t4.exp(); // t5 is an Operation. + Tensor result = t5; // The operations are evaluated. + +If you know the ranks and sizes of the Operation value you can assign the +Operation to a TensorFixedSize instead of a Tensor, which is a bit more +efficient. + + // We know that the result is a 4x4x2 tensor! + TensorFixedSize result = t5; + +Simiarly, assigning an expression to a TensorMap causes its evaluation. Like +tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to +have the rank and sizes of the expression that are assigned to them. + +#### Calling eval(). + +When you compute large composite expressions, you sometimes want to tell Eigen +that an intermediate value in the expression tree is worth evaluating ahead of +time. This is done by inserting a call to the ```eval()``` method of the +expression Operation. + + // The previous example could have been written: + Tensor result = ((t1 + t2) * 0.2f).exp(); + + // If you want to compute (t1 + t2) once ahead of time you can write: + Tensor result = ((t1 + t2).eval() * 0.2f).exp(); + +Semantically, calling ```eval()``` is equivalent to materializing the value of +the expression in a temporary Tensor of the right size. The code above in +effect does: + + // .eval() knows the size! + TensorFixedSize tmp = t1 + t2; + Tensor result = (tmp * 0.2f).exp(); + +Note that the return value of ```eval()``` is itself an Operation, so the +following code does not do what you may think: + + // Here t3 is an evaluation Operation. t3 has not been evaluated yet. + auto t3 = (t1 + t2).eval(); + + // You can use t3 in another expression. Still no evaluation. + auto t4 = (t3 * 0.2f).exp(); + + // The value is evaluated when you assign the Operation to a Tensor, using + // an intermediate tensor to represent t3.x + Tensor result = t4; + +While in the examples above calling ```eval()``` does not make a difference in +performance, in other cases it can make a huge difference. In the expression +below the ```broadcast()``` expression causes the ```X.maximum()``` expression +to be evaluated many times: + + Tensor<...> X ...; + Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast)) + * beta).exp(); + +Inserting a call to ```eval()``` between the ```maximum()``` and +```reshape()``` calls guarantees that maximum() is only computed once and +greatly speeds-up execution: + + Tensor<...> Y = + ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) + * beta).exp(); + +In the other example below, the tensor ```Y``` is both used in the expression +and its assignment. This is an aliasing problem and if the evaluation is not +done in the right order Y will be updated incrementally during the evaluation +resulting in bogus results: + + Tensor<...> Y ...; + Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast)); + +Inserting a call to ```eval()``` between the ```sum()``` and ```reshape()``` +expressions ensures that the sum is computed before any updates to ```Y``` are +done. + + Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + +Note that an eval around the full right hand side expression is not needed +because the generated has to compute the i-th value of the right hand side +before assigning it to the left hand side. + +However, if you were assigning the expression value to a shuffle of ```Y``` +then you would need to force an eval for correctness by adding an ```eval()``` +call for the right hand side: + + Y.shuffle(...) = + (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval(); + + +#### Assigning to a TensorRef. + +If you need to access only a few elements from the value of an expression you +can avoid materializing the value in a full tensor by using a TensorRef. + +A TensorRef is a small wrapper class for any Eigen Operation. It provides +overloads for the ```()``` operator that let you access individual values in +the expression. TensorRef is convenient, because the Operation themselves do +not provide a way to access individual elements. + + // Create a TensorRef for the expression. The expression is not + // evaluated yet. + TensorRef > ref = ((t1 + t2) * 0.2f).exp(); + + // Use "ref" to access individual elements. The expression is evaluated + // on the fly. + float at_0 = ref(0, 0, 0); + cout << ref(0, 1, 0); + +Only use TensorRef when you need a subset of the values of the expression. +TensorRef only computes the values you access. However note that if you are +going to access all the values it will be much faster to materialize the +results in a Tensor first. + +In some cases, if the full Tensor result would be very large, you may save +memory by accessing it as a TensorRef. But not always. So don't count on it. + + +### Controlling How Expressions Are Evaluated + +The tensor library provides several implementations of the various operations +such as contractions and convolutions. The implementations are optimized for +different environments: single threaded on CPU, multi threaded on CPU, or on a +GPU using cuda. Additional implementations may be added later. + +You can choose which implementation to use with the ```device()``` call. If +you do not choose an implementation explicitly the default implementation that +uses a single thread on the CPU is used. + +The default implementation has been optimized for recent Intel CPUs, taking +advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the +library on ARM CPUs. Note that you need to pass compiler-dependent flags +to enable the use of SSE, AVX, and other instructions. + +For example, the following code adds two tensors using the default +single-threaded CPU implementation: + + Tensor a(30, 40); + Tensor b(30, 40); + Tensor c = a + b; + +To choose a different implementation you have to insert a ```device()``` call +before the assignment of the result. For technical C++ reasons this requires +that the Tensor for the result be declared on its own. This means that you +have to know the size of the result. + + Eigen::Tensor c(30, 40); + c.device(...) = a + b; + +The call to ```device()``` must be the last call on the left of the operator=. + +You must pass to the ```device()``` call an Eigen device object. There are +presently three devices you can use: DefaultDevice, ThreadPoolDevice and +GpuDevice. + + +#### Evaluating With the DefaultDevice + +This is exactly the same as not inserting a ```device()``` call. + + DefaultDevice my_device; + c.device(my_device) = a + b; + +#### Evaluating with a Thread Pool + + // Create the Eigen ThreadPoolDevice. + Eigen::ThreadPoolDevice my_device(4 /* number of threads to use */); + + // Now just use the device when evaluating expressions. + Eigen::Tensor c(30, 50); + c.device(my_device) = a.contract(b, dot_product_dims); + + +#### Evaluating On GPU + +This is presently a bit more complicated than just using a thread pool device. +You need to create a GPU device but you also need to explicitly allocate the +memory for tensors with cuda. + + +## API Reference + +### Datatypes + +In the documentation of the tensor methods and Operation we mention datatypes +that are tensor-type specific: + +#### ::Dimensions + +Acts like an array of ints. Has an ```int size``` attribute, and can be +indexed like an array to access individual values. Used to represent the +dimensions of a tensor. See ```dimensions()```. + +#### ::Index + +Acts like an ```int```. Used for indexing tensors along their dimensions. See +```operator()```, ```dimension()```, and ```size()```. + +#### ::Scalar + +Represents the datatype of individual tensor elements. For example, for a +```Tensor```, ```Scalar``` is the type ```float```. See +```setConstant()```. + +#### + +We use this pseudo type to indicate that a tensor Operation is returned by a +method. We indicate in the text the type and dimensions of the tensor that the +Operation returns after evaluation. + +The Operation will have to be evaluated, for example by assigning it to a +tensor, before you can access the values of the resulting tensor. You can also +access the values through a TensorRef. + + +## Built-in Tensor Methods + +These are usual C++ methods that act on tensors immediately. They are not +Operations which provide delayed evaluation of their results. Unless specified +otherwise, all the methods listed below are available on all tensor classes: +Tensor, TensorFixedSize, and TensorMap. + +## Metadata + +### int NumDimensions + +Constant value indicating the number of dimensions of a Tensor. This is also +known as the tensor "rank". + + Eigen::Tensor a(3, 4); + cout << "Dims " << a.NumDimensions; + => Dims 2 + +### Dimensions dimensions() + +Returns an array-like object representing the dimensions of the tensor. +The actual type of the dimensions() result is ::Dimensions. + + Eigen::Tensor a(3, 4); + const Eigen::Tensor::Dimensions& d = a.dimensions(); + cout << "Dim size: " << d.size << ", dim 0: " << d[0] + << ", dim 1: " << d[1]; + => Dim size: 2, dim 0: 3, dim 1: 4 + +If you use a C++11 compiler, you can use ```auto``` to simplify the code: + + const auto& d = a.dimensions(); + cout << "Dim size: " << d.size << ", dim 0: " << d[0] + << ", dim 1: " << d[1]; + => Dim size: 2, dim 0: 3, dim 1: 4 + +### Index dimension(Index n) + +Returns the n-th dimension of the tensor. The actual type of the +```dimension()``` result is ```::Index```, but you can +always use it like an int. + + Eigen::Tensor a(3, 4); + int dim1 = a.dimension(1); + cout << "Dim 1: " << dim1; + => Dim 1: 4 + +### Index size() + +Returns the total number of elements in the tensor. This is the product of all +the tensor dimensions. The actual type of the ```size()``` result is +```::Index```, but you can always use it like an int. + + Eigen::Tensor a(3, 4); + cout << "Size: " << a.size(); + => Size: 12 + + +### Getting Dimensions From An Operation + +A few operations provide ```dimensions()``` directly, +e.g. ```TensorReslicingOp```. Most operations defer calculating dimensions +until the operation is being evaluated. If you need access to the dimensions +of a deferred operation, you can wrap it in a TensorRef (see Assigning to a +TensorRef above), which provides ```dimensions()``` and ```dimension()``` as +above. + +TensorRef can also wrap the plain Tensor types, so this is a useful idiom in +templated contexts where the underlying object could be either a raw Tensor +or some deferred operation (e.g. a slice of a Tensor). In this case, the +template code can wrap the object in a TensorRef and reason about its +dimensionality while remaining agnostic to the underlying type. + + +## Constructors + +### Tensor + +Creates a tensor of the specified size. The number of arguments must be equal +to the rank of the tensor. The content of the tensor is not initialized. + + Eigen::Tensor a(3, 4); + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + +### TensorFixedSize + +Creates a tensor of the specified size. The number of arguments in the Size<> +template parameter determines the rank of the tensor. The content of the tensor +is not initialized. + + Eigen::TensorFixedSize> a; + cout << "Rank: " << a.rank() << endl; + => Rank: 2 + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + +### TensorMap + +Creates a tensor mapping an existing array of data. The data must not be freed +until the TensorMap is discarded, and the size of the data must be large enough +to accomodate of the coefficients of the tensor. + + float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + Eigen::TensorMap a(data, 3, 4); + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + cout << "a(1, 2): " << a(1, 2) << endl; + => a(1, 2): 9 + + +## Contents Initialization + +When a new Tensor or a new TensorFixedSize are created, memory is allocated to +hold all the tensor elements, but the memory is not initialized. Similarly, +when a new TensorMap is created on top of non-initialized memory the memory its +contents are not initialized. + +You can use one of the methods below to initialize the tensor memory. These +have an immediate effect on the tensor and return the tensor itself as a +result. These are not tensor Operations which delay evaluation. + +### setConstant(const Scalar& val) + +Sets all elements of the tensor to the constant value ```val```. ```Scalar``` +is the type of data stored in the tensor. You can pass any value that is +convertible to that type. + +Returns the tensor itself in case you want to chain another call. + + a.setConstant(12.3f); + cout << "Constant: " << endl << a << endl << endl; + => + Constant: + 12.3 12.3 12.3 12.3 + 12.3 12.3 12.3 12.3 + 12.3 12.3 12.3 12.3 + +Note that ```setConstant()``` can be used on any tensor where the element type +has a copy constructor and an ```operator=()```: + + Eigen::Tensor a(2, 3); + a.setConstant("yolo"); + cout << "String tensor: " << endl << a << endl << endl; + => + String tensor: + yolo yolo yolo + yolo yolo yolo + + +### setZero() + +Fills the tensor with zeros. Equivalent to ```setConstant(Scalar(0))```. +Returns the tensor itself in case you want to chain another call. + + a.setZero(); + cout << "Zeros: " << endl << a << endl << endl; + => + Zeros: + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + + +### setValues({..initializer_list}) + +Fills the tensor with explicit values specified in a std::initializer_list. +The type of the initializer list depends on the type and rank of the tensor. + +If the tensor has rank N, the initializer list must be nested N times. The +most deeply nested lists must contains P scalars of the Tensor type where P is +the size of the last dimension of the Tensor. + +For example, for a ```TensorFixedSize``` the initializer list must +contains 2 lists of 3 floats each. + +```setValues()``` returns the tensor itself in case you want to chain another +call. + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}}); + cout << "a" << endl << a << endl << endl; + => + a + 0 1 2 + 3 4 5 + +If a list is too short, the corresponding elements of the tensor will not be +changed. This is valid at each level of nesting. For example the following +code only sets the values of the first row of the tensor. + + Eigen::Tensor a(2, 3); + a.setConstant(1000); + a.setValues({{10, 20, 30}}); + cout << "a" << endl << a << endl << endl; + => + a + 10 20 30 + 1000 1000 1000 + +### setRandom() + +Fills the tensor with random values. Returns the tensor itself in case you +want to chain another call. + + a.setRandom(); + cout << "Random: " << endl << a << endl << endl; + => + Random: + 0.680375 0.59688 -0.329554 0.10794 + -0.211234 0.823295 0.536459 -0.0452059 + 0.566198 -0.604897 -0.444451 0.257742 + +You can customize ```setRandom()``` by providing your own random number +generator as a template argument: + + a.setRandom(); + +Here, ```MyRandomGenerator``` must be a struct with the following member +functions, where Scalar and Index are the same as ```::Scalar``` +and ```::Index```. + +See ```struct UniformRandomGenerator``` in TensorFunctors.h for an example. + + // Custom number generator for use with setRandom(). + struct MyRandomGenerator { + // Default and copy constructors. Both are needed + MyRandomGenerator() { } + MyRandomGenerator(const MyRandomGenerator& ) { } + + // Return a random value to be used. "element_location" is the + // location of the entry to set in the tensor, it can typically + // be ignored. + Scalar operator()(Eigen::DenseIndex element_location, + Eigen::DenseIndex /*unused*/ = 0) const { + return ; + } + + // Same as above but generates several numbers at a time. + typename internal::packet_traits::type packetOp( + Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { + return ; + } + }; + +You can also use one of the 2 random number generators that are part of the +tensor library: +* UniformRandomGenerator +* NormalRandomGenerator + + +## Data Access + +The Tensor, TensorFixedSize, and TensorRef classes provide the following +accessors to access the tensor coefficients: + + const Scalar& operator()(const array& indices) + const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + Scalar& operator()(const array& indices) + Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + +The number of indices must be equal to the rank of the tensor. Moreover, these +accessors are not available on tensor expressions. In order to access the +values of a tensor expression, the expression must either be evaluated or +wrapped in a TensorRef. + + +### Scalar* data() and const Scalar* data() const + +Returns a pointer to the storage for the tensor. The pointer is const if the +tensor was const. This allows direct access to the data. The layout of the +data depends on the tensor layout: RowMajor or ColMajor. + +This access is usually only needed for special cases, for example when mixing +Eigen Tensor code with other libraries. + +Scalar is the type of data stored in the tensor. + + Eigen::Tensor a(3, 4); + float* a_data = a.data(); + a_data[0] = 123.45f; + cout << "a(0, 0): " << a(0, 0); + => a(0, 0): 123.45 + + +## Tensor Operations + +All the methods documented below return non evaluated tensor ```Operations```. +These can be chained: you can apply another Tensor Operation to the value +returned by the method. + +The chain of Operation is evaluated lazily, typically when it is assigned to a +tensor. See "Controlling when Expression are Evaluated" for more details about +their evaluation. + +### constant(const Scalar& val) + +Returns a tensor of the same type and dimensions as the original tensor but +where all elements have the value ```val```. + +This is useful, for example, when you want to add or subtract a constant from a +tensor, or multiply every element of a tensor by a scalar. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = a + a.constant(2.0f); + Eigen::Tensor c = b * b.constant(0.2f); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + cout << "c" << endl << c << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + 3 3 3 + 3 3 3 + + c + 0.6 0.6 0.6 + 0.6 0.6 0.6 + +### random() + +Returns a tensor of the same type and dimensions as the current tensor +but where all elements have random values. + +This is for example useful to add random values to an existing tensor. +The generation of random values can be customized in the same manner +as for ```setRandom()```. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = a + a.random(); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + 1.68038 1.5662 1.82329 + 0.788766 1.59688 0.395103 + + +## Unary Element Wise Operations + +All these operations take a single input tensor as argument and return a tensor +of the same type and dimensions as the tensor to which they are applied. The +requested operations are applied to each element independently. + +### operator-() + +Returns a tensor of the same type and dimensions as the original tensor +containing the opposite values of the original tensor. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = -a; + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + -1 -1 -1 + -1 -1 -1 + +### sqrt() + +Returns a tensor of the same type and dimensions as the original tensor +containing the square roots of the original tensor. + +### rsqrt() + +Returns a tensor of the same type and dimensions as the original tensor +containing the inverse square roots of the original tensor. + +### square() + +Returns a tensor of the same type and dimensions as the original tensor +containing the squares of the original tensor values. + +### inverse() + +Returns a tensor of the same type and dimensions as the original tensor +containing the inverse of the original tensor values. + +### exp() + +Returns a tensor of the same type and dimensions as the original tensor +containing the exponential of the original tensor. + +### log() + +Returns a tensor of the same type and dimensions as the original tensor +containing the natural logarithms of the original tensor. + +### abs() + +Returns a tensor of the same type and dimensions as the original tensor +containing the absolute values of the original tensor. + +### pow(Scalar exponent) + +Returns a tensor of the same type and dimensions as the original tensor +containing the coefficients of the original tensor to the power of the +exponent. + +The type of the exponent, Scalar, is always the same as the type of the +tensor coefficients. For example, only integer exponents can be used in +conjuntion with tensors of integer values. + +You can use cast() to lift this restriction. For example this computes +cubic roots of an int Tensor: + + Eigen::Tensor a(2, 3); + a.setValues({{0, 1, 8}, {27, 64, 125}}); + Eigen::Tensor b = a.cast().pow(1.0 / 3.0); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 0 1 8 + 27 64 125 + + b + 0 1 2 + 3 4 5 + +### operator * (Scalar scale) + +Multiplies all the coefficients of the input tensor by the provided scale. + +### cwiseMax(Scalar threshold) +TODO + +### cwiseMin(Scalar threshold) +TODO + +### unaryExpr(const CustomUnaryOp& func) +TODO + + +## Binary Element Wise Operations + +These operations take two input tensors as arguments. The 2 input tensors should +be of the same type and dimensions. The result is a tensor of the same +dimensions as the tensors to which they are applied, and unless otherwise +specified it is also of the same type. The requested operations are applied to +each pair of elements independently. + +### operator+(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise sums of the inputs. + +### operator-(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise differences of the inputs. + +### operator*(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise products of the inputs. + +### operator/(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise quotients of the inputs. + +This operator is not supported for integer types. + +### cwiseMax(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise maximums of the inputs. + +### cwiseMin(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise mimimums of the inputs. + +### Logical operators + +The following logical operators are supported as well: + +* operator&&(const OtherDerived& other) +* operator||(const OtherDerived& other) +* operator<(const OtherDerived& other) +* operator<=(const OtherDerived& other) +* operator>(const OtherDerived& other) +* operator>=(const OtherDerived& other) +* operator==(const OtherDerived& other) +* operator!=(const OtherDerived& other) + +They all return a tensor of boolean values. + + +## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) + +Selection is a coefficient-wise ternary operator that is the tensor equivalent +to the if-then-else operation. + + Tensor if = ...; + Tensor then = ...; + Tensor else = ...; + Tensor result = if.select(then, else); + +The 3 arguments must be of the same dimensions, which will also be the dimension +of the result. The 'if' tensor must be of type boolean, the 'then' and the +'else' tensor must be of the same type, which will also be the type of the +result. + +Each coefficient in the result is equal to the corresponding coefficient in the +'then' tensor if the corresponding value in the 'if' tensor is true. If not, the +resulting coefficient will come from the 'else' tensor. + + +## Contraction + +Tensor *contractions* are a generalization of the matrix product to the +multidimensional case. + + // Create 2 matrices using tensors of rank 2 + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {6, 5, 4}}); + Eigen::Tensor b(3, 2); + a.setValues({{1, 2}, {4, 5}, {5, 6}}); + + // Compute the traditional matrix product + array, 1> product_dims = { IndexPair(1, 0) }; + Eigen::Tensor AB = a.contract(b, product_dims); + + // Compute the product of the transpose of the matrices + array, 1> transpose_product_dims = { IndexPair(0, 1) }; + Eigen::Tensor AtBt = a.contract(b, transposed_product_dims); + + +## Reduction Operations + +A *Reduction* operation returns a tensor with fewer dimensions than the +original tensor. The values in the returned tensor are computed by applying a +*reduction operator* to slices of values from the original tensor. You specify +the dimensions along which the slices are made. + +The Eigen Tensor library provides a set of predefined reduction operators such +as ```maximum()``` and ```sum()``` and lets you define additional operators by +implementing a few methods from a reductor template. + +### Reduction Dimensions + +All reduction operations take a single parameter of type +```::Dimensions``` which can always be specified as an array of +ints. These are called the "reduction dimensions." The values are the indices +of the dimensions of the input tensor over which the reduction is done. The +parameter can have at most as many element as the rank of the input tensor; +each element must be less than the tensor rank, as it indicates one of the +dimensions to reduce. + +Each dimension of the input tensor should occur at most once in the reduction +dimensions as the implementation does not remove duplicates. + +The order of the values in the reduction dimensions does not affect the +results, but the code may execute faster if you list the dimensions in +increasing order. + +Example: Reduction along one dimension. + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {6, 5, 4}}); + // Reduce it along the second dimension (1)... + Eigen::array dims({1 /* dimension to reduce */}); + // ...using the "maximum" operator. + // The result is a tensor with one dimension. The size of + // that dimension is the same as the first (non-reduced) dimension of a. + Eigen::Tensor b = a.maximum(dims); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 6 5 4 + + b + 3 + 6 + +Example: Reduction along two dimensions. + + Eigen::Tensor a(2, 3, 4); + a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, + {7.0f, 6.0f, 5.0f, 4.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}, + {{12.0f, 13.0f, 14.0f, 15.0f}, + {19.0f, 18.0f, 17.0f, 16.0f}, + {20.0f, 21.0f, 22.0f, 23.0f}}}); + // The tensor a has 3 dimensions. We reduce along the + // first 2, resulting in a tensor with a single dimension + // of size 4 (the last dimension of a.) + // Note that we pass the array of reduction dimensions + // directly to the maximum() call. + Eigen::Tensor b = + a.maximum(Eigen::array({0, 1})); + cout << "b" << endl << b << endl << endl; + => + b + 20 + 21 + 22 + 23 + +#### Reduction along all dimensions + +As a special case, if you pass no parameter to a reduction operation the +original tensor is reduced along *all* its dimensions. The result is a +scalar, represented as a zero-dimension tensor. + + Eigen::Tensor a(2, 3, 4); + a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, + {7.0f, 6.0f, 5.0f, 4.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}, + {{12.0f, 13.0f, 14.0f, 15.0f}, + {19.0f, 18.0f, 17.0f, 16.0f}, + {20.0f, 21.0f, 22.0f, 23.0f}}}); + // Reduce along all dimensions using the sum() operator. + Eigen::Tensor b = a.sum(); + cout << "b" << endl << b << endl << endl; + => + b + 276 + + +### sum(const Dimensions& new_dims) +### sum() + +Reduce a tensor using the sum() operator. The resulting values +are the sum of the reduced values. + +### mean(const Dimensions& new_dims) +### mean() + +Reduce a tensor using the mean() operator. The resulting values +are the mean of the reduced values. + +### maximum(const Dimensions& new_dims) +### maximum() + +Reduce a tensor using the maximum() operator. The resulting values are the +largest of the reduced values. + +### minimum(const Dimensions& new_dims) +### minimum() + +Reduce a tensor using the minimum() operator. The resulting values +are the smallest of the reduced values. + +### prod(const Dimensions& new_dims) +### prod() + +Reduce a tensor using the prod() operator. The resulting values +are the product of the reduced values. + +### all(const Dimensions& new_dims) +### all() +Reduce a tensor using the all() operator. Casts tensor to bool and then checks +whether all elements are true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + +### any(const Dimensions& new_dims) +### any() +Reduce a tensor using the any() operator. Casts tensor to bool and then checks +whether any element is true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + + +### reduce(const Dimensions& new_dims, const Reducer& reducer) + +Reduce a tensor using a user-defined reduction operator. See ```SumReducer``` +in TensorFunctors.h for information on how to implement a reduction operator. + + +## Scan Operations + +A *Scan* operation returns a tensor with the same dimensions as the original +tensor. The operation performs an inclusive scan along the specified +axis, which means it computes a running total along the axis for a given +reduction operation. +If the reduction operation corresponds to summation, then this computes the +prefix sum of the tensor along the given axis. + +Example: +dd a comment to this line + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {4, 5, 6}}); + // Scan it along the second dimension (1) using summation + Eigen::Tensor b = a.cumsum(1); + // The result is a tensor with the same size as the input + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 6 5 4 + + b + 1 3 6 + 4 9 15 + +### cumsum(const Index& axis) + +Perform a scan by summing consecutive entries. + +### cumprod(const Index& axis) + +Perform a scan by multiplying consecutive entries. + + +## Convolutions + +### convolve(const Kernel& kernel, const Dimensions& dims) + +Returns a tensor that is the output of the convolution of the input tensor with the kernel, +along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor +which were part of the convolution will be reduced by the formula: +output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size). +The dimension sizes for dimensions that were not part of the convolution will remain the same. +Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the +convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is +for the last dimension). + + // Compute convolution along the second and third dimension. + Tensor input(3, 3, 7, 11); + Tensor kernel(2, 2); + Tensor output(3, 2, 6, 11); + input.setRandom(); + kernel.setRandom(); + + Eigen::array dims({1, 2}); // Specify second and third dimension for convolution. + output = input.convolve(kernel, dims); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 6; ++k) { + for (int l = 0; l < 11; ++l) { + const float result = output(i,j,k,l); + const float expected = input(i,j+0,k+0,l) * kernel(0,0) + + input(i,j+1,k+0,l) * kernel(1,0) + + input(i,j+0,k+1,l) * kernel(0,1) + + input(i,j+1,k+1,l) * kernel(1,1); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + +## Geometrical Operations + +These operations return a Tensor with different dimensions than the original +Tensor. They can be used to access slices of tensors, see them with different +dimensions, or pad tensors with additional data. + +### reshape(const Dimensions& new_dims) + +Returns a view of the input tensor that has been reshaped to the specified +new dimensions. The argument new_dims is an array of Index values. The +rank of the resulting tensor is equal to the number of elements in new_dims. + +The product of all the sizes in the new dimension array must be equal to +the number of elements in the input tensor. + + // Increase the rank of the input tensor by introducing a new dimension + // of size 1. + Tensor input(7, 11); + array three_dims{{7, 11, 1}}; + Tensor result = input.reshape(three_dims); + + // Decrease the rank of the input tensor by merging 2 dimensions; + array one_dim{{7 * 11}}; + Tensor result = input.reshape(one_dim); + +This operation does not move any data in the input tensor, so the resulting +contents of a reshaped Tensor depend on the data layout of the original Tensor. + +For example this is what happens when you ```reshape()``` a 2D ColMajor tensor +to one dimension: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array one_dim({3 * 2}); + Eigen::Tensor b = a.reshape(one_dim); + cout << "b" << endl << b << endl; + => + b + 0 + 300 + 100 + 400 + 200 + 500 + +This is what happens when the 2D Tensor is RowMajor: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array one_dim({3 * 2}); + Eigen::Tensor b = a.reshape(one_dim); + cout << "b" << endl << b << endl; + => + b + 0 + 100 + 200 + 300 + 400 + 500 + +The reshape operation is a lvalue. In other words, it can be used on the left +side of the assignment operator. + +The previous example can be rewritten as follow: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array two_dim({2, 3}); + Eigen::Tensor b; + b.reshape(two_dim) = a; + cout << "b" << endl << b << endl; + => + b + 0 + 300 + 100 + 400 + 200 + 500 + +Note that "b" itself was not reshaped but that instead the assignment is done to +the reshape view of b. + + +### shuffle(const Shuffle& shuffle) + +Returns a copy of the input tensor whose dimensions have been +reordered according to the specified permutation. The argument shuffle +is an array of Index values. Its size is the rank of the input +tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th +dimension of the output tensor equals to the size of the shuffle[i]-th +dimension of the input tensor. For example: + + // Shuffle all dimensions to the left by 1. + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output = input.shuffle({1, 2, 0}) + + eigen_assert(output.dimension(0) == 30); + eigen_assert(output.dimension(1) == 50); + eigen_assert(output.dimension(2) == 20); + +Indices into the output tensor are shuffled accordingly to formulate +indices into the input tensor. For example, one can assert in the above +code snippet that: + + eigen_assert(output(3, 7, 11) == input(11, 3, 7)); + +In general, one can assert that + + eigen_assert(output(..., indices[shuffle[i]], ...) == + input(..., indices[i], ...)) + +The shuffle operation results in a lvalue, which means that it can be assigned +to. In other words, it can be used on the left side of the assignment operator. + +Let's rewrite the previous example to take advantage of this feature: + + // Shuffle all dimensions to the left by 1. + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output(30, 50, 20); + output.shuffle({2, 0, 1}) = input; + + +### stride(const Strides& strides) + +Returns a view of the input tensor that strides (skips stride-1 +elements) along each of the dimensions. The argument strides is an +array of Index values. The dimensions of the resulting tensor are +ceil(input_dimensions[i] / strides[i]). + +For example this is what happens when you ```stride()``` a 2D tensor: + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array strides({3, 2}); + Eigen::Tensor b = a.stride(strides); + cout << "b" << endl << b << endl; + => + b + 0 200 + 900 1100 + +It is possible to assign a tensor to a stride: + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output(40, 90, 200); + output.stride({2, 3, 4}) = input; + + +### slice(const StartIndices& offsets, const Sizes& extents) + +Returns a sub-tensor of the given tensor. For each dimension i, the slice is +made of the coefficients stored between offset[i] and offset[i] + extents[i] in +the input tensor. + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array offsets = {1, 0}; + Eigen::array extents = {2, 2}; + Eigen::Tensor slice = a.slice(offsets, extents); + cout << "a" << endl << a << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + cout << "slice" << endl << slice << endl; + => + slice + 300 400 + 600 700 + + +### chip(const Index offset, const Index dim) + +A chip is a special kind of slice. It is the subtensor at the given offset in +the dimension dim. The returned tensor has one fewer dimension than the input +tensor: the dimension dim is removed. + +For example, a matrix chip would be either a row or a column of the input +matrix. + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::Tensor row_3 = a.chip(2, 0); + Eigen::Tensor col_2 = a.chip(1, 1); + cout << "a" << endl << a << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + cout << "row_3" << endl << row_3 << endl; + => + row_3 + 600 700 800 + cout << "col_2" << endl << col_2 << endl; + => + col_2 + 100 400 700 1000 + +It is possible to assign values to a tensor chip since the chip operation is a +lvalue. For example: + + Eigen::Tensor a(3); + a.setValues({{100, 200, 300}}); + Eigen::Tensor b(2, 3); + b.setZero(); + b.chip(0, 0) = a; + cout << "a" << endl << a << endl; + => + a + 100 + 200 + 300 + cout << "b" << endl << b << endl; + => + b + 100 200 300 + 0 0 0 + + +### reverse(const ReverseDimensions& reverse) + +Returns a view of the input tensor that reverses the order of the coefficients +along a subset of the dimensions. The argument reverse is an array of boolean +values that indicates whether or not the order of the coefficients should be +reversed along each of the dimensions. This operation preserves the dimensions +of the input tensor. + +For example this is what happens when you ```reverse()``` the first dimension +of a 2D tensor: + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array reverse({true, false}); + Eigen::Tensor b = a.reverse(reverse); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + b + 900 1000 1100 + 600 700 800 + 300 400 500 + 0 100 200 + + +### broadcast(const Broadcast& broadcast) + +Returns a view of the input tensor in which the input is replicated one to many +times. +The broadcast argument specifies how many copies of the input tensor need to be +made in each of the dimensions. + + Eigen::Tensor a(2, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}}); + Eigen::array bcast({3, 2}); + Eigen::Tensor b = a.broadcast(bcast); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + b + 0 100 200 0 100 200 + 300 400 500 300 400 500 + 0 100 200 0 100 200 + 300 400 500 300 400 500 + 0 100 200 0 100 200 + 300 400 500 300 400 500 + +### concatenate(const OtherDerived& other, Axis axis) + +TODO + +### pad(const PaddingDimensions& padding) + +Returns a view of the input tensor in which the input is padded with zeros. + + Eigen::Tensor a(2, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}}); + Eigen::array, 2> paddings; + paddings[0] = make_pair(0, 1); + paddings[1] = make_pair(2, 3); + Eigen::Tensor b = a.pad(paddings); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + b + 0 0 0 0 + 0 0 0 0 + 0 100 200 0 + 300 400 500 0 + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + + +### extract_patches(const PatchDims& patch_dims) + +Returns a tensor of coefficient patches extracted from the input tensor, where +each patch is of dimension specified by 'patch_dims'. The returned tensor has +one greater dimension than the input tensor, which is used to index each patch. +The patch index in the output tensor depends on the data layout of the input +tensor: the patch index is the last dimension ColMajor layout, and the first +dimension in RowMajor layout. + +For example, given the following input tensor: + + Eigen::Tensor tensor(3,4); + tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f}, + {4.0f, 5.0f, 6.0f, 7.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}); + + cout << "tensor: " << endl << tensor << endl; +=> +tensor: + 0 1 2 3 + 4 5 6 7 + 8 9 10 11 + +Six 2x2 patches can be extracted and indexed using the following code: + + Eigen::Tensor patch; + Eigen::array patch_dims; + patch_dims[0] = 2; + patch_dims[1] = 2; + patch = tensor.extract_patches(patch_dims); + for (int k = 0; k < 6; ++k) { + cout << "patch index: " << k << endl; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + if (DataLayout == ColMajor) { + cout << patch(i, j, k) << " "; + } else { + cout << patch(k, i, j) << " "; + } + } + cout << endl; + } + } + +This code results in the following output when the data layout is ColMajor: + +patch index: 0 +0 1 +4 5 +patch index: 1 +4 5 +8 9 +patch index: 2 +1 2 +5 6 +patch index: 3 +5 6 +9 10 +patch index: 4 +2 3 +6 7 +patch index: 5 +6 7 +10 11 + +This code results in the following output when the data layout is RowMajor: +(NOTE: the set of patches is the same as in ColMajor, but are indexed differently). + +patch index: 0 +0 1 +4 5 +patch index: 1 +1 2 +5 6 +patch index: 2 +2 3 +6 7 +patch index: 3 +4 5 +8 9 +patch index: 4 +5 6 +9 10 +patch index: 5 +6 7 +10 11 + +### extract_image_patches(const Index patch_rows, const Index patch_cols, + const Index row_stride, const Index col_stride, + const PaddingType padding_type) + +Returns a tensor of coefficient image patches extracted from the input tensor, +which is expected to have dimensions ordered as follows (depending on the data +layout of the input tensor, and the number of additional dimensions 'N'): + +*) ColMajor +1st dimension: channels (of size d) +2nd dimension: rows (of size r) +3rd dimension: columns (of size c) +4th-Nth dimension: time (for video) or batch (for bulk processing). + +*) RowMajor (reverse order of ColMajor) +1st-Nth dimension: time (for video) or batch (for bulk processing). +N+1'th dimension: columns (of size c) +N+2'th dimension: rows (of size r) +N+3'th dimension: channels (of size d) + +The returned tensor has one greater dimension than the input tensor, which is +used to index each patch. The patch index in the output tensor depends on the +data layout of the input tensor: the patch index is the 4'th dimension in +ColMajor layout, and the 4'th from the last dimension in RowMajor layout. + +For example, given the following input tensor with the following dimension +sizes: + *) depth: 2 + *) rows: 3 + *) columns: 5 + *) batch: 7 + + Tensor tensor(2,3,5,7); + Tensor tensor_row_major = tensor.swap_layout(); + +2x2 image patches can be extracted and indexed using the following code: + +*) 2D patch: ColMajor (patch indexed by second-to-last dimension) + Tensor twod_patch; + twod_patch = tensor.extract_image_patches<2, 2>(); + // twod_patch.dimension(0) == 2 + // twod_patch.dimension(1) == 2 + // twod_patch.dimension(2) == 2 + // twod_patch.dimension(3) == 3*5 + // twod_patch.dimension(4) == 7 + +*) 2D patch: RowMajor (patch indexed by the second dimension) + Tensor twod_patch_row_major; + twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>(); + // twod_patch_row_major.dimension(0) == 7 + // twod_patch_row_major.dimension(1) == 3*5 + // twod_patch_row_major.dimension(2) == 2 + // twod_patch_row_major.dimension(3) == 2 + // twod_patch_row_major.dimension(4) == 2 + +## Special Operations + +### cast() + +Returns a tensor of type T with the same dimensions as the original tensor. +The returned tensor contains the values of the original tensor converted to +type T. + + Eigen::Tensor a(2, 3); + Eigen::Tensor b = a.cast(); + +This can be useful for example if you need to do element-wise division of +Tensors of integers. This is not currently supported by the Tensor library +but you can easily cast the tensors to floats to do the division: + + Eigen::Tensor a(2, 3); + a.setValues({{0, 1, 2}, {3, 4, 5}}); + Eigen::Tensor b = + (a.cast() / a.constant(2).cast()).cast(); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 0 1 2 + 3 4 5 + + b + 0 0 1 + 1 2 2 + + +### eval() + +TODO + + +## Representation of scalar values + +Scalar values are often represented by tensors of size 1 and rank 1. It would be +more logical and user friendly to use tensors of rank 0 instead. For example +Tensor::maximum() currently returns a Tensor. Similarly, the inner +product of 2 1d tensors (through contractions) returns a 1d tensor. In the +future these operations might be updated to return 0d tensors instead. + +## Limitations + +* The number of tensor dimensions is currently limited to 250 when using a + compiler that supports cxx11. It is limited to only 5 for older compilers. +* The IndexList class requires a cxx11 compliant compiler. You can use an + array of indices instead if you don't have access to a modern compiler. +* On GPUs only floating point values are properly tested and optimized for. +* Complex and integer values are known to be broken on GPUs. If you try to use + them you'll most likely end up triggering a static assertion failure such as + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + + diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h new file mode 100644 index 000000000..1940a9692 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -0,0 +1,527 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_H + +namespace Eigen { + +/** \class Tensor + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor class. + * + * The %Tensor class is the work-horse for all \em dense tensors within Eigen. + * + * The %Tensor class encompasses only dynamic-size objects so far. + * + * The first two template parameters are required: + * \tparam Scalar_ \anchor tensor_tparam_scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam NumIndices_ Number of indices (i.e. rank of the tensor) + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam Options_ \anchor tensor_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required + * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. + * Support for such operations (i.e. adding two tensors etc.) is planned. + * + * You can access elements of tensors using normal subscripting: + * + * \code + * Eigen::Tensor t(10, 10, 10, 10); + * t(0, 1, 2, 3) = 42.0; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. + * + * Some notes: + * + *
    + *
    Relation to other parts of Eigen:
    + *
    The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that + * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code + * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor + * class does not provide any of these features and is only available as a stand-alone class that just allows for + * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to + * change dramatically.
    + *
    + * + * \ref TopicStorageOrders + */ + +template +class Tensor : public TensorBase > +{ + public: + typedef Tensor Self; + typedef TensorBase > Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef Scalar_ Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign), + Layout = Options_ & RowMajor ? RowMajor : ColMajor, + CoordAccess = true, + RawAccess = true + }; + + static const int Options = Options_; + static const int NumIndices = NumIndices_; + typedef DSizes Dimensions; + + protected: + TensorStorage m_storage; + +#ifdef EIGEN_HAS_SFINAE + template + struct isOfNormalIndex{ + static const bool is_array = internal::is_base_of, CustomIndices>::value; + static const bool is_int = NumTraits::IsInteger; + static const bool value = is_array | is_int; + }; +#endif + + public: + // Metadata + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } + + // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + // work, because that uses base().coeffRef() - and we don't yet + // implement a similar class hierarchy + inline Self& base() { return *this; } + inline const Self& base() const { return *this; } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeff(array{{firstIndex, secondIndex, otherIndices...}}); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(array{{firstIndex, secondIndex, otherIndices...}}); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return this->operator()(array{{firstIndex, secondIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + return coeff(array(i0, i1)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + return coeff(array(i0, i1, i2)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + return coeff(array(i0, i1, i2, i3)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + return coeff(array(i0, i1, i2, i3, i4)); + } +#endif + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + return coeff(indices); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const + { + // The bracket operator is only for vectors, use the parenthesis operator instead. + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return operator()(array{{firstIndex, secondIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + return coeffRef(array(i0, i1)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + return coeffRef(array(i0, i1, i2)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + return coeffRef(array(i0, i1, i2, i3)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + return coeffRef(array(i0, i1, i2, i3, i4)); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + return coeffRef(indices); + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) + { + // The bracket operator is only for vectors, use the parenthesis operator instead + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor() + : m_storage() + { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const Self& other) + : m_storage(other.m_storage) + { + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) + : m_storage(firstDimension, otherDimensions...) + { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) + : m_storage(dim1, array(dim1)) + { + EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2) + : m_storage(dim1*dim2, array(dim1, dim2)) + { + EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3) + : m_storage(dim1*dim2*dim3, array(dim1, dim2, dim3)) + { + EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4) + : m_storage(dim1*dim2*dim3*dim4, array(dim1, dim2, dim3, dim4)) + { + EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) + : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) + { + EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#endif + + /** Normal Dimension */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array& dimensions) + : m_storage(internal::array_prod(dimensions), dimensions) + { + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + void resize(Index firstDimension, IndexTypes... otherDimensions) + { + // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + resize(array{{firstDimension, otherDimensions...}}); + } +#endif + + /** Normal Dimension */ + EIGEN_DEVICE_FUNC void resize(const array& dimensions) + { + int i; + Index size = Index(1); + for (i = 0; i < NumIndices; i++) { + internal::check_rows_cols_for_overflow::run(size, dimensions[i]); + size *= dimensions[i]; + } + #ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); + m_storage.resize(size, dimensions); + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #else + m_storage.resize(size, dimensions); + #endif + } + + // Why this overload, DSizes is derived from array ??? // + EIGEN_DEVICE_FUNC void resize(const DSizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = dimensions[i]; + } + resize(dims); + } + + EIGEN_DEVICE_FUNC + void resize() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Nothing to do: rank 0 tensors have fixed size + } + + /** Custom Dimension */ +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) + { + resize(internal::customIndices2Array(dimensions)); + } +#endif + +#ifndef EIGEN_EMULATE_CXX11_META_H + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#else + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#endif + + protected: + + bool checkIndexRange(const array& indices) const + { + using internal::array_apply_and_reduce; + using internal::array_zip_and_reduce; + using internal::greater_equal_zero_op; + using internal::logical_and_op; + using internal::lesser_op; + + return + // check whether the indices are all >= 0 + array_apply_and_reduce(indices) && + // check whether the indices fit in the dimensions + array_zip_and_reduce(indices, m_storage.dimensions()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array& indices) const + { + if (Options&RowMajor) { + return m_storage.dimensions().IndexOfRowMajor(indices); + } else { + return m_storage.dimensions().IndexOfColMajor(indices); + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h new file mode 100644 index 000000000..d06f40cd8 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -0,0 +1,299 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H +#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H + +namespace Eigen { +namespace internal { + +/** \class TensorIndexTuple + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor + Index Tuple class. + * + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Tuple Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorIndexTupleOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorIndexTupleOp type; +}; + +} // end namespace internal + +template +class TensorIndexTupleOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Tuple CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorIndexTupleOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return CoeffReturnType(index, m_impl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + TensorEvaluator m_impl; +}; + +namespace internal { + +/** \class TensorTupleIndex + * \ingroup CXX11_Tensor_Module + * + * \brief Converts to Tensor > and reduces to Tensor. + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Index Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorTupleReducerOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorTupleReducerOp type; +}; + +} // end namespace internal + +template +class TensorTupleReducerOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Index CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, + const ReduceOp& reduce_op, + const int return_dim, + const Dims& reduce_dims) + : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const ReduceOp& reduce_op() const { return m_reduce_op; } + + EIGEN_DEVICE_FUNC + const Dims& reduce_dims() const { return m_reduce_dims; } + + EIGEN_DEVICE_FUNC + int return_dim() const { return m_return_dim; } + + protected: + typename XprType::Nested m_xpr; + const ReduceOp m_reduce_op; + const int m_return_dim; + const Dims m_reduce_dims; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorTupleReducerOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename TensorIndexTupleOp::CoeffReturnType TupleType; + typedef typename TensorEvaluator >, Device>::Dimensions Dimensions; + typedef typename TensorEvaluator , Device>::Dimensions InputDimensions; + static const int NumDims = internal::array_size::value; + typedef array StrideDims; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator >, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_orig_impl(op.expression(), device), + m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), + m_return_dim(op.return_dim()) { + + gen_strides(m_orig_impl.dimensions(), m_strides); + if (Layout == static_cast(ColMajor)) { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; + } else { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; + } + m_stride_div = m_strides[m_return_dim]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + const TupleType v = m_impl.coeff(index); + return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = 1.0 + + (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost() + TensorOpCost::DivCost())); + return m_orig_impl.costPerCoeff(vectorized) + + m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost); + } + + private: + EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { + if (m_return_dim < 0) { + return; // Won't be using the strides. + } + eigen_assert(m_return_dim < NumDims && + "Asking to convert index to a dimension outside of the rank"); + + // Calculate m_stride_div and m_stride_mod, which are used to + // calculate the value of an index w.r.t. the m_return_dim. + if (Layout == static_cast(ColMajor)) { + strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + strides[i] = strides[i-1] * dims[i-1]; + } + } else { + strides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + strides[i] = strides[i+1] * dims[i+1]; + } + } + } + + protected: + TensorEvaluator, Device> m_orig_impl; + TensorEvaluator >, Device> m_impl; + const int m_return_dim; + StrideDims m_strides; + Index m_stride_mod; + Index m_stride_div; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h new file mode 100644 index 000000000..166be200c --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -0,0 +1,181 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H +#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H + +namespace Eigen { + +/** \class TensorAssign + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor assignment class. + * + * This class is represents the assignment of the values resulting from the evaluation of + * the rhs expression to the memory locations denoted by the lhs expression. + */ +namespace internal { +template +struct traits > +{ + typedef typename LhsXprType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const std::size_t NumDimensions = internal::traits::NumDimensions; + static const int Layout = internal::traits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorAssignOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorAssignOp type; +}; + +} // end namespace internal + + + +template +class TensorAssignOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename LhsXprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {} + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + typename internal::remove_all::type& + lhsExpression() const { return *((typename internal::remove_all::type*)&m_lhs_xpr); } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename internal::remove_all::type& m_lhs_xpr; + const typename internal::remove_all::type& m_rhs_xpr; +}; + + +template +struct TensorEvaluator, Device> +{ + typedef TensorAssignOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // The dimensions of the lhs and the rhs tensors should be equal to prevent + // overflows and ensure the result is fully initialized. + // TODO: use left impl instead if right impl dimensions are known at compile time. + return m_rightImpl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); + m_leftImpl.evalSubExprsIfNeeded(NULL); + // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non + // null value), attempt to evaluate the rhs expression in place. Returns true iff in place + // evaluation isn't supported and the caller still needs to manually assign the values generated + // by the rhs to the lhs. + return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { + m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { + const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + m_leftImpl.template writePacket(i, m_rightImpl.template packet(i)); + } + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_leftImpl.coeff(index); + } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_leftImpl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // We assume that evalPacket or evalScalar is called to perform the + // assignment and account for the cost of the write here, but reduce left + // cost by one load because we are using m_leftImpl.coeffRef. + TensorOpCost left = m_leftImpl.costPerCoeff(vectorized); + return m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost( + numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)), + left.bytes_stored(), left.compute_cycles()) + + TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); + } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } + + private: + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; +}; + +} + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h new file mode 100644 index 000000000..7a45a5cf4 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -0,0 +1,1010 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H +#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H + +// clang-format off + +namespace Eigen { + +/** \class TensorBase + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor base class. + * + * This class is the common parent of the Tensor and TensorMap class, thus + * making it possible to use either class interchangably in expressions. + */ + +template +class TensorBase +{ + public: + typedef internal::traits DerivedTraits; + typedef typename DerivedTraits::Scalar Scalar; + typedef typename DerivedTraits::Index Index; + typedef typename internal::remove_const::type CoeffReturnType; + static const int NumDimensions = DerivedTraits::NumDimensions; + + // Generic nullary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp + nullaryExpr(const CustomNullaryOp& func) const { + return TensorCwiseNullaryOp(derived(), func); + } + + // Coefficient-wise nullary operators + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> + constant(const Scalar& value) const { + return nullaryExpr(internal::scalar_constant_op(value)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> + random() const { + return nullaryExpr(internal::UniformRandomGenerator()); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp + random(const RandomGenerator& gen = RandomGenerator()) const { + return nullaryExpr(gen); + } + + // Tensor generation + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorGeneratorOp + generate(const Generator& generator) const { + return TensorGeneratorOp(derived(), generator); + } + + // Generic unary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp + unaryExpr(const CustomUnaryOp& func) const { + return TensorCwiseUnaryOp(derived(), func); + } + + // Coefficient-wise unary operators + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator-() const { + return unaryExpr(internal::scalar_opposite_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sqrt() const { + return unaryExpr(internal::scalar_sqrt_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sign() const { + return unaryExpr(internal::scalar_sign_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + rsqrt() const { + return unaryExpr(internal::scalar_rsqrt_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + square() const { + return unaryExpr(internal::scalar_square_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + cube() const { + return unaryExpr(internal::scalar_cube_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + inverse() const { + return unaryExpr(internal::scalar_inverse_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + tanh() const { + return unaryExpr(internal::scalar_tanh_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + digamma() const { + return unaryExpr(internal::scalar_digamma_op()); + } + + // igamma(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igamma_op()); + } + + // igammac(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igammac(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igammac_op()); + } + + // zeta(x = this, q = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + zeta(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_zeta_op()); + } + + // polygamma(n = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + polygamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_polygamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sigmoid() const { + return unaryExpr(internal::scalar_sigmoid_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + exp() const { + return unaryExpr(internal::scalar_exp_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log() const { + return unaryExpr(internal::scalar_log_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log1p() const { + return unaryExpr(internal::scalar_log1p_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + abs() const { + return unaryExpr(internal::scalar_abs_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + conjugate() const { + return unaryExpr(internal::scalar_conjugate_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + pow(Scalar exponent) const { + return unaryExpr(internal::bind2nd_op >(exponent)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + real() const { + return unaryExpr(internal::scalar_real_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + imag() const { + return unaryExpr(internal::scalar_imag_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar rhs) const { + EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator% (Scalar rhs) const { + EIGEN_STATIC_ASSERT(NumTraits::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); + return unaryExpr(internal::scalar_mod_op(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + cwiseMax(Scalar threshold) const { + return cwiseMax(constant(threshold)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + cwiseMin(Scalar threshold) const { + return cwiseMin(constant(threshold)); + } + + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorConversionOp + cast() const { + return TensorConversionOp(derived()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + round() const { + return unaryExpr(internal::scalar_round_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + ceil() const { + return unaryExpr(internal::scalar_ceil_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + floor() const { + return unaryExpr(internal::scalar_floor_op()); + } + + // Generic binary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp + binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const { + return TensorCwiseBinaryOp(derived(), other, func); + } + + // Coefficient-wise binary operators. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator+(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_sum_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator-(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_difference_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator*(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_product_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator/(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_quotient_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + cwiseMax(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_max_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + cwiseMin(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_min_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator&&(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_and_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator||(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_or_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator^(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_xor_op()); + } + + // Comparisons and tests. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator<(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator<=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator>(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator>=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator==(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator!=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + // comparisons and tests for Scalars + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<(Scalar threshold) const { + return operator<(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<=(Scalar threshold) const { + return operator<=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>(Scalar threshold) const { + return operator>(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>=(Scalar threshold) const { + return operator>=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator==(Scalar threshold) const { + return operator==(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator!=(Scalar threshold) const { + return operator!=(constant(threshold)); + } + + // Checks + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isnan)() const { + return unaryExpr(internal::scalar_isnan_op()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isinf)() const { + return unaryExpr(internal::scalar_isinf_op()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isfinite)() const { + return unaryExpr(internal::scalar_isfinite_op()); + } + + // Coefficient-wise ternary operators. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSelectOp + select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const { + return TensorSelectOp(derived(), thenTensor.derived(), elseTensor.derived()); + } + + // Contractions. + typedef Eigen::IndexPair DimensionPair; + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorContractionOp + contract(const OtherDerived& other, const Dimensions& dims) const { + return TensorContractionOp(derived(), other.derived(), dims); + } + + // Convolutions. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConvolutionOp + convolve(const KernelDerived& kernel, const Dimensions& dims) const { + return TensorConvolutionOp(derived(), kernel.derived(), dims); + } + + // Fourier transforms + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorFFTOp + fft(const FFT& fft) const { + return TensorFFTOp(derived(), fft); + } + + // Scan. + typedef TensorScanOp, const Derived> TensorScanSumOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanSumOp + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); + } + + typedef TensorScanOp, const Derived> TensorScanProdOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanProdOp + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanOp + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); + } + + // Reductions. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + sum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::SumReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + sum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::SumReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + mean(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MeanReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + mean() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MeanReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + prod(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::ProdReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + prod() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::ProdReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + maximum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MaxReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + maximum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MaxReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + minimum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MinReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + minimum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + all(const Dims& dims) const { + return cast().reduce(dims, internal::AndReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + all() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::AndReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + any(const Dims& dims) const { + return cast().reduce(dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + any() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax() const { + array in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin() const { + array in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax(const int return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), return_dim, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin(const int return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), return_dim, in_dims); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp + reduce(const Dims& dims, const Reducer& reducer) const { + return TensorReductionOp(derived(), dims, reducer); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorBroadcastingOp + broadcast(const Broadcast& broadcast) const { + return TensorBroadcastingOp(derived(), broadcast); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, Axis axis) const { + return TensorConcatenationOp(derived(), other.derived(), axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPatchOp + extract_patches(const PatchDims& patch_dims) const { + return TensorPatchOp(derived(), patch_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorImagePatchOp + extract_image_patches(const Index patch_rows = 1, const Index patch_cols = 1, + const Index row_stride = 1, const Index col_stride = 1, + const Index in_row_stride = 1, const Index in_col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { + return TensorImagePatchOp(derived(), patch_rows, patch_cols, row_stride, col_stride, + in_row_stride, in_col_stride, 1, 1, padding_type, padding_value); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorImagePatchOp + extract_image_patches(const Index patch_rows, const Index patch_cols, + const Index row_stride, const Index col_stride, + const Index in_row_stride, const Index in_col_stride, + const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top, const Index padding_bottom, + const Index padding_left,const Index padding_right, + const Scalar padding_value) const { + return TensorImagePatchOp(derived(), patch_rows, patch_cols, row_stride, col_stride, + in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride, + padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride, const Index row_stride, const Index col_stride, + const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top_z, const Index padding_bottom_z, + const Index padding_top, const Index padding_bottom, + const Index padding_left, const Index padding_right, const Scalar padding_value = Scalar(0)) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + + // Morphing operators. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorLayoutSwapOp + swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPaddingOp + pad(const PaddingDimensions& padding) const { + return TensorPaddingOp(derived(), padding, internal::scalar_cast_op()(0)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPaddingOp + pad(const PaddingDimensions& padding, const Scalar padding_value) const { + return TensorPaddingOp(derived(), padding, padding_value); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shuffle) const { + return TensorShufflingOp(derived(), shuffle); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorInflationOp + inflate(const Strides& strides) const { + return TensorInflationOp(derived(), strides); + } + + // Returns a tensor containing index/value tuples + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorIndexTupleOp + index_tuples() const { + return TensorIndexTupleOp(derived()); + } + + // Support for custom unary and binary operations + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomUnaryOp customOp(const CustomUnaryFunc& op) const { + return TensorCustomUnaryOp(derived(), op); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomBinaryOp customOp(const OtherDerived& other, const CustomBinaryFunc& op) const { + return TensorCustomBinaryOp(derived(), other, op); + } + + // Force the evaluation of the expression. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorForcedEvalOp eval() const { + return TensorForcedEvalOp(derived()); + } + + protected: + template friend class Tensor; + template friend class TensorFixedSize; + template friend class TensorBase; + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } +}; + +template::value> +class TensorBase : public TensorBase { + public: + typedef internal::traits DerivedTraits; + typedef typename DerivedTraits::Scalar Scalar; + typedef typename DerivedTraits::Index Index; + typedef Scalar CoeffReturnType; + static const int NumDimensions = DerivedTraits::NumDimensions; + + template friend class Tensor; + template friend class TensorFixedSize; + template friend class TensorBase; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setZero() { + return setConstant(Scalar(0)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) { + return derived() = this->constant(val); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setRandom() { + return derived() = this->random(); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setRandom() { + return derived() = this->template random(); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setValues( + const typename internal::Initializer::InitList& vals) { + TensorEvaluator eval(derived(), DefaultDevice()); + internal::initialize_tensor(eval, vals); + return derived(); + } +#endif // EIGEN_HAS_VARIADIC_TEMPLATES + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const OtherDerived& other) { + return derived() = derived() + other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const OtherDerived& other) { + return derived() = derived() - other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const OtherDerived& other) { + return derived() = derived() * other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const OtherDerived& other) { + return derived() = derived() / other.derived(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorLayoutSwapOp + swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorLayoutSwapOp + swap_layout() { + return TensorLayoutSwapOp(derived()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) const { + return TensorConcatenationOp(derived(), other, axis); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) { + return TensorConcatenationOp(derived(), other, axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReshapingOp + reshape(const NewDimensions& newDimensions) { + return TensorReshapingOp(derived(), newDimensions); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) { + return TensorSlicingOp(derived(), startIndices, sizes); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset) { + return TensorChippingOp(derived(), offset, DimId); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset, const Index dim) { + return TensorChippingOp(derived(), offset, dim); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReverseOp + reverse(const ReverseDimensions& rev) { + return TensorReverseOp(derived(), rev); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shuffle) const { + return TensorShufflingOp(derived(), shuffle); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorShufflingOp + shuffle(const Shuffle& shuffle) { + return TensorShufflingOp(derived(), shuffle); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingOp + stride(const Strides& strides) { + return TensorStridingOp(derived(), strides); + } + + // Select the device on which to evaluate the expression. + template + TensorDevice device(const DeviceType& device) { + return TensorDevice(device, derived()); + } + + protected: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h new file mode 100644 index 000000000..4cfe300eb --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -0,0 +1,392 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H +#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H + +namespace Eigen { + +/** \class TensorBroadcasting + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor broadcasting class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorBroadcastingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorBroadcastingOp type; +}; + +template +struct is_input_scalar { + static const bool value = false; +}; +template <> +struct is_input_scalar > { + static const bool value = true; +}; +#ifndef EIGEN_EMULATE_CXX11_META_H +template +struct is_input_scalar > { + static const bool value = (Sizes::total_size == 1); +}; +#endif + +} // end namespace internal + + + +template +class TensorBroadcastingOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) + : m_xpr(expr), m_broadcast(broadcast) {} + + EIGEN_DEVICE_FUNC + const Broadcast& broadcast() const { return m_broadcast; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Broadcast m_broadcast; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorBroadcastingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = true, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_broadcast(op.broadcast()),m_impl(op.expression(), device) + { + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar + // and store the result in a scalar. Instead one should reshape the scalar into a a N-D + // tensor with N >= 1 of 1 element first and then broadcast. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + const InputDimensions& input_dims = m_impl.dimensions(); + const Broadcast& broadcast = op.broadcast(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i] * broadcast[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + m_inputStrides[NumDims-1] = 1; + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const + { + if (internal::is_input_scalar::type>::value) { + return m_impl.coeff(0); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + return coeffColMajor(index); + } else { + return coeffRowMajor(index); + } + } + + // TODO: attempt to speed this up. The integer divisions and modulo are slow + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const + { + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + inputIndex += index; + } else { + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[0]); + } + } + return m_impl.coeff(inputIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const + { + Index inputIndex = 0; + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + inputIndex += index; + } else { + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[NumDims-1]); + } + } + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const + { + if (internal::is_input_scalar::type>::value) { + return internal::pset1(m_impl.coeff(0)); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + return packetColMajor(index); + } else { + return packetRowMajor(index); + } + } + + // Ignore the LoadMode and always use unaligned loads since we can't guarantee + // the alignment at compile time. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + innermostLoc = index; + } else { + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[0]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndex); + for (int i = 1; i < PacketSize; ++i) { + values[i] = coeffColMajor(originalIndex+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + innermostLoc = index; + } else { + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[NumDims-1]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndex); + for (int i = 1; i < PacketSize; ++i) { + values[i] = coeffRowMajor(originalIndex+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double compute_cost = TensorOpCost::AddCost(); + if (NumDims > 0) { + for (int i = NumDims - 1; i > 0; --i) { + compute_cost += TensorOpCost::DivCost(); + if (internal::index_statically_eq(i, 1)) { + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else { + if (!internal::index_statically_eq(i, 1)) { + compute_cost += TensorOpCost::MulCost() + + TensorOpCost::ModCost() + + TensorOpCost::AddCost(); + } + } + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } + } + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + Broadcast functor() const { return m_broadcast; } + + protected: + const Broadcast m_broadcast; + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h new file mode 100644 index 000000000..1ba7ef170 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -0,0 +1,384 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H +#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H + +namespace Eigen { + +/** \class TensorKChippingReshaping + * \ingroup CXX11_Tensor_Module + * + * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. + * + * + */ + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorChippingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorChippingOp type; +}; + +template +struct DimensionId +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { + eigen_assert(dim == DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return DimId; + } +}; +template <> +struct DimensionId +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { + eigen_assert(dim >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return actual_dim; + } + private: + const DenseIndex actual_dim; +}; + + +} // end namespace internal + + + +template +class TensorChippingOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) + : m_xpr(expr), m_offset(offset), m_dim(dim) { + } + + EIGEN_DEVICE_FUNC + const Index offset() const { return m_offset; } + EIGEN_DEVICE_FUNC + const Index dim() const { return m_dim.actualDim(); } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const Index m_offset; + const internal::DimensionId m_dim; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorChippingOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims-1; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets. + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) + { + EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(NumInputDims > m_dim.actualDim()); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); + + int j = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (i != m_dim.actualDim()) { + m_dimensions[j] = input_dims[i]; + ++j; + } + } + + m_stride = 1; + m_inputStride = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < m_dim.actualDim(); ++i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } else { + for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } + m_inputStride *= input_dims[m_dim.actualDim()]; + m_inputOffset = m_stride * op.offset(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + Index inputIndex = index * m_inputStride + m_inputOffset; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = m_impl.coeff(inputIndex); + inputIndex += m_inputStride; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(m_stride > index); + return m_impl.template packet(index + m_inputOffset); + } else { + const Index idx = index / m_stride; + const Index rem = index - idx * m_stride; + if (rem + PacketSize <= m_stride) { + Index inputIndex = idx * m_inputStride + m_inputOffset + rem; + return m_impl.template packet(inputIndex); + } else { + // Cross the stride boundary. Fallback to slow path. + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index); + ++index; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double cost = 0; + if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == NumInputDims - 1)) { + cost += TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == 0)) { + cost += TensorOpCost::AddCost(); + } else { + cost += 3 * TensorOpCost::MulCost() + TensorOpCost::DivCost() + + 3 * TensorOpCost::AddCost(); + } + + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { + CoeffReturnType* result = const_cast(m_impl.data()); + if (((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumDims) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) && + result) { + return result + m_inputOffset; + } else { + return NULL; + } + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex; + if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + inputIndex = index * m_inputStride + m_inputOffset; + } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims-1) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(m_stride > index); + inputIndex = index + m_inputOffset; + } else { + const Index idx = index / m_stride; + inputIndex = idx * m_inputStride + m_inputOffset; + index -= idx * m_stride; + inputIndex += index; + } + return inputIndex; + } + + Dimensions m_dimensions; + Index m_stride; + Index m_inputOffset; + Index m_inputStride; + TensorEvaluator m_impl; + const internal::DimensionId m_dim; + const Device& m_device; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorChippingOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims-1; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + + if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == 0) || + (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(this->m_stride == 1); + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + Index inputIndex = index * this->m_inputStride + this->m_inputOffset; + for (int i = 0; i < PacketSize; ++i) { + this->m_impl.coeffRef(inputIndex) = values[i]; + inputIndex += this->m_inputStride; + } + } else if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) || + (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(this->m_stride > index); + this->m_impl.template writePacket(index + this->m_inputOffset, x); + } else { + const Index idx = index / this->m_stride; + const Index rem = index - idx * this->m_stride; + if (rem + PacketSize <= this->m_stride) { + const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; + this->m_impl.template writePacket(inputIndex, x); + } else { + // Cross stride boundary. Fallback to slow path. + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + for (int i = 0; i < PacketSize; ++i) { + this->coeffRef(index) = values[i]; + ++index; + } + } + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h new file mode 100644 index 000000000..59bf90d93 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -0,0 +1,361 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H + +namespace Eigen { + +/** \class TensorConcatenationOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor concatenation class. + * + * + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type::ret Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConcatenationOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConcatenationOp type; +}; + +} // end namespace internal + + +template +class TensorConcatenationOp : public TensorBase, WriteAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Axis m_axis; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConcatenationOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + static const int RightNumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + eigen_assert(0 <= m_axis && m_axis < NumDims); + const Dimensions& lhs_dims = m_leftImpl.dimensions(); + const Dimensions& rhs_dims = m_rightImpl.dimensions(); + { + int i = 0; + for (; i < m_axis; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + eigen_assert(lhs_dims[i] > 0); // Now i == m_axis. + eigen_assert(rhs_dims[i] > 0); + m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; + for (++i; i < NumDims; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_leftStrides[0] = 1; + m_rightStrides[0] = 1; + m_outputStrides[0] = 1; + + for (int j = 1; j < NumDims; ++j) { + m_leftStrides[j] = m_leftStrides[j-1] * lhs_dims[j-1]; + m_rightStrides[j] = m_rightStrides[j-1] * rhs_dims[j-1]; + m_outputStrides[j] = m_outputStrides[j-1] * m_dimensions[j-1]; + } + } else { + m_leftStrides[NumDims - 1] = 1; + m_rightStrides[NumDims - 1] = 1; + m_outputStrides[NumDims - 1] = 1; + + for (int j = NumDims - 2; j >= 0; --j) { + m_leftStrides[j] = m_leftStrides[j+1] * lhs_dims[j+1]; + m_rightStrides[j] = m_rightStrides[j+1] * rhs_dims[j+1]; + m_outputStrides[j] = m_outputStrides[j+1] * m_dimensions[j+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) + { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. + // See CL/76180724 comments for more ideas. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Collect dimension-wise indices (subs). + array subs; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[0] = index; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[NumDims - 1] = index; + } + + const Dimensions& left_dims = m_leftImpl.dimensions(); + if (subs[m_axis] < left_dims[m_axis]) { + Index left_index; + if (static_cast(Layout) == static_cast(ColMajor)) { + left_index = subs[0]; + for (int i = 1; i < NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + } else { + left_index = subs[NumDims - 1]; + for (int i = NumDims - 2; i >= 0; --i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + } + return m_leftImpl.coeff(left_index); + } else { + subs[m_axis] -= left_dims[m_axis]; + const Dimensions& right_dims = m_rightImpl.dimensions(); + Index right_index; + if (static_cast(Layout) == static_cast(ColMajor)) { + right_index = subs[0]; + for (int i = 1; i < NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + } else { + right_index = subs[NumDims - 1]; + for (int i = NumDims - 2; i >= 0; --i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + } + return m_rightImpl.coeff(right_index); + } + } + + // TODO(phli): Add a real vectorization. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost() + + TensorOpCost::ModCost()); + const double lhs_size = m_leftImpl.dimensions().TotalSize(); + const double rhs_size = m_rightImpl.dimensions().TotalSize(); + return (lhs_size / (lhs_size + rhs_size)) * + m_leftImpl.costPerCoeff(vectorized) + + (rhs_size / (lhs_size + rhs_size)) * + m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_leftStrides; + array m_rightStrides; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; + const Axis m_axis; +}; + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorConcatenationOp XprType; + typedef typename Base::Dimensions Dimensions; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device) + : Base(op, device) + { + EIGEN_STATIC_ASSERT((static_cast(Layout) == static_cast(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + // Collect dimension-wise indices (subs). + array subs; + for (int i = Base::NumDims - 1; i > 0; --i) { + subs[i] = index / this->m_outputStrides[i]; + index -= subs[i] * this->m_outputStrides[i]; + } + subs[0] = index; + + const Dimensions& left_dims = this->m_leftImpl.dimensions(); + if (subs[this->m_axis] < left_dims[this->m_axis]) { + Index left_index = subs[0]; + for (int i = 1; i < Base::NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i]; + } + return this->m_leftImpl.coeffRef(left_index); + } else { + subs[this->m_axis] -= left_dims[this->m_axis]; + const Dimensions& right_dims = this->m_rightImpl.dimensions(); + Index right_index = subs[0]; + for (int i = 1; i < Base::NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i]; + } + return this->m_rightImpl.coeffRef(right_index); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); + + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + internal::pstore(values, x); + for (int i = 0; i < packetSize; ++i) { + coeffRef(index+i) = values[i]; + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h new file mode 100644 index 000000000..20b29e5fd --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -0,0 +1,628 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H + +namespace Eigen { + +/** \class TensorContraction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor contraction class. + * + * + */ +namespace internal { + +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename gebp_traits::type, + typename remove_const::type>::ResScalar Scalar; + + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + + // From NumDims below. + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; + static const int Layout = traits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorContractionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorContractionOp type; +}; + +template +struct traits, Device_> > { + typedef Indices_ Indices; + typedef LeftArgType_ LeftArgType; + typedef RightArgType_ RightArgType; + typedef Device_ Device; + + // From NumDims below. + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; +}; + +} // end namespace internal + +template +class TensorContractionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename internal::gebp_traits::ResScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp( + const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {} + + EIGEN_DEVICE_FUNC + const Indices& indices() const { return m_indices; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Indices m_indices; +}; + + +template +struct TensorContractionEvaluatorBase +{ + typedef typename internal::traits::Indices Indices; + typedef typename internal::traits::LeftArgType LeftArgType; + typedef typename internal::traits::RightArgType RightArgType; + typedef typename internal::traits::Device Device; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + IsAligned = true, + PacketAccess = (internal::unpacket_traits::size > 1), + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = true + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + typedef DSizes Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorContractionEvaluatorBase(const XprType& op, const Device& device) + : m_leftImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), + op.lhsExpression(), op.rhsExpression()), device), + m_rightImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), + op.rhsExpression(), op.lhsExpression()), device), + m_device(device), + m_result(NULL) { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == + static_cast(TensorEvaluator::Layout)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + + DSizes eval_left_dims; + DSizes eval_right_dims; + array, ContractDims> eval_op_indices; + if (static_cast(Layout) == static_cast(ColMajor)) { + // For ColMajor, we keep using the existing dimensions + for (int i = 0; i < LDims; i++) { + eval_left_dims[i] = m_leftImpl.dimensions()[i]; + } + for (int i = 0; i < RDims; i++) { + eval_right_dims[i] = m_rightImpl.dimensions()[i]; + } + // We keep the pairs of contracting indices. + for (int i = 0; i < ContractDims; i++) { + eval_op_indices[i].first = op.indices()[i].first; + eval_op_indices[i].second = op.indices()[i].second; + } + } else { + // For RowMajor, we need to reverse the existing dimensions + for (int i = 0; i < LDims; i++) { + eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1]; + } + for (int i = 0; i < RDims; i++) { + eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1]; + } + // We need to flip all the pairs of contracting indices as well as + // reversing the dimensions. + for (int i = 0; i < ContractDims; i++) { + eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; + eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; + } + } + + // Check for duplicate axes and make sure the first index in eval_op_indices + // is increasing. Using O(n^2) sorting is OK since ContractDims is small + for (int i = 0; i < ContractDims; i++) { + for (int j = i + 1; j < ContractDims; j++) { + eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first && + eval_op_indices[j].second != eval_op_indices[i].second && + "contraction axes should be unique"); + if (eval_op_indices[j].first < eval_op_indices[i].first) { + numext::swap(eval_op_indices[j], eval_op_indices[i]); + } + } + } + + array lhs_strides; + lhs_strides[0] = 1; + for (int i = 0; i < LDims-1; ++i) { + lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i]; + } + + array rhs_strides; + rhs_strides[0] = 1; + for (int i = 0; i < RDims-1; ++i) { + rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; + } + + if (m_i_strides.size() > 0) m_i_strides[0] = 1; + if (m_j_strides.size() > 0) m_j_strides[0] = 1; + if (m_k_strides.size() > 0) m_k_strides[0] = 1; + + m_i_size = 1; + m_j_size = 1; + m_k_size = 1; + + // To compute the dimension, we simply concatenate the non-contracting + // dimensions of the left and then the right tensor. Additionally, we also + // compute the strides corresponding to the left non-contracting + // dimensions and right non-contracting dimensions. + m_lhs_inner_dim_contiguous = true; + int dim_idx = 0; + unsigned int nocontract_idx = 0; + + for (int i = 0; i < LDims; i++) { + // find if we are contracting on index i of left tensor + bool contracting = false; + for (int j = 0; j < ContractDims; j++) { + if (eval_op_indices[j].first == i) { + contracting = true; + break; + } + } + if (!contracting) { + // add dimension size to output dimensions + m_dimensions[dim_idx] = eval_left_dims[i]; + m_left_nocontract_strides[nocontract_idx] = lhs_strides[i]; + if (dim_idx != i) { + m_lhs_inner_dim_contiguous = false; + } + if (nocontract_idx+1 < internal::array_size::value) { + m_i_strides[nocontract_idx+1] = + m_i_strides[nocontract_idx] * eval_left_dims[i]; + } else { + m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i]; + } + dim_idx++; + nocontract_idx++; + } + } + + nocontract_idx = 0; + for (int i = 0; i < RDims; i++) { + bool contracting = false; + // find if we are contracting on index i of right tensor + for (int j = 0; j < ContractDims; j++) { + if (eval_op_indices[j].second == i) { + contracting = true; + break; + } + } + if (!contracting) { + m_dimensions[dim_idx] = eval_right_dims[i]; + if (nocontract_idx+1 < internal::array_size::value) { + m_j_strides[nocontract_idx+1] = + m_j_strides[nocontract_idx] * eval_right_dims[i]; + } else { + m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i]; + } + m_right_nocontract_strides[nocontract_idx] = rhs_strides[i]; + dim_idx++; + nocontract_idx++; + } + } + + // Now compute the strides corresponding to the contracting dimensions. We + // assumed above that non-contracting axes are represented in the same order + // in the matrix as they are in the tensor. This is not the case for + // contracting axes. As the contracting axes must be of the same size in + // each tensor, we'll only look at the first tensor here. + m_rhs_inner_dim_contiguous = true; + m_rhs_inner_dim_reordered = false; + for (int i = 0; i < ContractDims; i++) { + Index left = eval_op_indices[i].first; + Index right = eval_op_indices[i].second; + + Index size = eval_left_dims[left]; + eigen_assert(size == eval_right_dims[right] && + "Contraction axes must be same size"); + + if (i+1 < static_cast(internal::array_size::value)) { + m_k_strides[i+1] = m_k_strides[i] * size; + } else { + m_k_size = m_k_strides[i] * size; + } + m_left_contracting_strides[i] = lhs_strides[left]; + m_right_contracting_strides[i] = rhs_strides[right]; + + if (i > 0 && right < eval_op_indices[i-1].second) { + m_rhs_inner_dim_reordered = true; + } + if (right != i) { + m_rhs_inner_dim_contiguous = false; + } + } + + // If the layout is RowMajor, we need to reverse the m_dimensions + if (static_cast(Layout) == static_cast(RowMajor)) { + for (int i = 0, j = NumDims - 1; i < j; i++, j--) { + numext::swap(m_dimensions[i], m_dimensions[j]); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { + if (this->m_lhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + } + else { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + } + } + + template + EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const { + const Index rows = m_i_size; + const Index cols = m_k_size; + + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; + const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; + const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, + m_left_contracting_strides, m_k_strides); + RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides, + m_right_contracting_strides, m_k_strides); + + const Scalar alpha(1); + const Index resIncr(1); + + // zero out the result buffer (which must be of size at least rows * sizeof(Scalar) + m_device.memset(buffer, 0, rows * sizeof(Scalar)); + + internal::general_matrix_vector_product::run( + rows, cols, lhs, rhs, + buffer, resIncr, alpha); + } + + template + EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + // define mr, nr, and all of my data mapper types + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef typename internal::gebp_traits Traits; + + const Index nr = Traits::nr; + const Index mr = Traits::mr; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + // Declare GEBP packing and kernel structs + internal::gemm_pack_lhs pack_lhs; + internal::gemm_pack_rhs pack_rhs; + + internal::gebp_kernel gebp; + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + // Sizes of the blocks to load in cache. See the Goto paper for details. + internal::TensorContractionBlocking blocking(k, m, n, 1); + const Index kc = blocking.kc(); + const Index mc = numext::mini(m, blocking.mc()); + const Index nc = numext::mini(n, blocking.nc()); + const Index sizeA = mc * kc; + const Index sizeB = kc * nc; + + LhsScalar* blockA = static_cast(this->m_device.allocate(sizeA * sizeof(LhsScalar))); + RhsScalar* blockB = static_cast(this->m_device.allocate(sizeB * sizeof(RhsScalar))); + + for(Index i2=0; i2m_device.deallocate(blockA); + this->m_device.deallocate(blockB); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } + + protected: + // Prevent assignment + TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&); + Dimensions m_dimensions; + + contract_t m_k_strides; + contract_t m_left_contracting_strides; + contract_t m_right_contracting_strides; + + bool m_lhs_inner_dim_contiguous; + bool m_rhs_inner_dim_contiguous; + bool m_rhs_inner_dim_reordered; + + left_nocontract_t m_i_strides; + right_nocontract_t m_j_strides; + left_nocontract_t m_left_nocontract_strides; + right_nocontract_t m_right_nocontract_strides; + + Index m_i_size; + Index m_j_size; + Index m_k_size; + + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; + const Device& m_device; + Scalar* m_result; +}; + + +// evaluator for default device +template +struct TensorEvaluator, Device> : + public TensorContractionEvaluatorBase< + TensorEvaluator, Device> > { + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + // Could we use NumDimensions here? + typedef DSizes Dimensions; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) { } + + template + EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { + if (this->m_j_size == 1) { + this->template evalGemv(buffer); + return; + } + + this->template evalGemm(buffer); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h new file mode 100644 index 000000000..5cf7b4f71 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -0,0 +1,56 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H + + +namespace Eigen { +namespace internal { + +enum { + ShardByRow = 0, + ShardByCol = 1 +}; + + +// Default Blocking Strategy +template +class TensorContractionBlocking { + public: + + typedef typename LhsMapper::Scalar LhsScalar; + typedef typename RhsMapper::Scalar RhsScalar; + + EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : + kc_(k), mc_(m), nc_(n) + { + if (ShardingType == ShardByCol) { + computeProductBlockingSizes(kc_, mc_, nc_, num_threads); + } + else { + computeProductBlockingSizes(kc_, nc_, mc_, num_threads); + } + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } + + private: + Index kc_; + Index mc_; + Index nc_; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h new file mode 100644 index 000000000..d65dbb40f --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -0,0 +1,1391 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2015 Benoit Steiner +// Copyright (C) 2015 Navdeep Jaitly +// Copyright (C) 2014 Eric Martin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + +namespace Eigen { + +template +__device__ EIGEN_STRONG_INLINE void +EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, Scalar* lhs_shmem, Scalar* rhs_shmem, + const Index m_size, const Index n_size, const Index k_size) { + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + // declare and initialize 64 registers for output 8x8 block + + // prefetch registers + Scalar lhs_pf0; + Scalar lhs_pf1; + Scalar lhs_pf2; + Scalar lhs_pf3; + Scalar lhs_pf4; + Scalar lhs_pf5; + Scalar lhs_pf6; + Scalar lhs_pf7; + + Scalar rhs_pf0; + Scalar rhs_pf1; + Scalar rhs_pf2; + Scalar rhs_pf3; + Scalar rhs_pf4; + Scalar rhs_pf5; + Scalar rhs_pf6; + Scalar rhs_pf7; + + // shared memory is formatted + // (contract idx in block, nocontract idx in block, block idx) + // where block idx is column major. This transposition limits the number of + // bank conflicts when reading the LHS. The core idea is that since the contracting + // index is shared by both sides, then the contracting index should be in threadIdx.x. + + // On the LHS, we pad each row inside of each block with an extra element. This makes + // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts + // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks. + + // On the RHS we just add 8 padding elements to the end of each block. This gives no bank + // conflicts on writes and also none on reads. + + // storage indices + const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; + const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; + + const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0; + const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1; + const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2; + const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3; + const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4; + const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5; + const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6; + const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7; + + const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0; + const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1; + const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2; + const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3; + const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4; + const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5; + const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6; + const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7; + + // in the loading code, the following variables are important: + // threadIdx.x: the vertical position in an 8x8 block + // threadIdx.y: the vertical index of the 8x8 block in the grid + // threadIdx.z: the horizontal position in an 8x8 block + // k: the horizontal index of the 8x8 block in the grid + // + // The k parameter is implicit (it was the loop counter for a loop that went + // from 0 to <8, but now that loop is unrolled in the below code. + + const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; + const Index lhs_vert = base_m + load_idx_vert; + +#define prefetchIntoRegisters(base_k) \ + { \ + lhs_pf0 = conv(0); \ + lhs_pf1 = conv(0); \ + lhs_pf2 = conv(0); \ + lhs_pf3 = conv(0); \ + lhs_pf4 = conv(0); \ + lhs_pf5 = conv(0); \ + lhs_pf6 = conv(0); \ + lhs_pf7 = conv(0); \ + \ + rhs_pf0 = conv(0); \ + rhs_pf1 = conv(0); \ + rhs_pf2 = conv(0); \ + rhs_pf3 = conv(0); \ + rhs_pf4 = conv(0); \ + rhs_pf5 = conv(0); \ + rhs_pf6 = conv(0); \ + rhs_pf7 = conv(0); \ + \ + if (!needs_edge_check || lhs_vert < m_size) { \ + const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ + const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ + const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ + const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ + const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ + const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ + const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ + const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8; \ + \ + if (!needs_edge_check || lhs_horiz_7 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ + lhs_pf7 = lhs(lhs_vert, lhs_horiz_7); \ + } else if (lhs_horiz_6 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ + } else if (lhs_horiz_5 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + } else if (lhs_horiz_4 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + } else if (lhs_horiz_3 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + } else if (lhs_horiz_2 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + } else if (lhs_horiz_1 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + } else if (lhs_horiz_0 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + } \ + } \ + \ + const Index rhs_vert = base_k + load_idx_vert; \ + if (!needs_edge_check || rhs_vert < k_size) { \ + const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \ + const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \ + const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \ + const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \ + const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \ + const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \ + const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \ + const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \ + \ + if (rhs_horiz_7 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ + rhs_pf7 = rhs(rhs_vert, rhs_horiz_7); \ + } else if (rhs_horiz_6 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ + } else if (rhs_horiz_5 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + } else if (rhs_horiz_4 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + } else if (rhs_horiz_3 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + } else if (rhs_horiz_2 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + } else if (rhs_horiz_1 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + } else if (rhs_horiz_0 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + } \ + } \ + } \ + +#define writeRegToShmem(_) \ + lhs_shmem[lhs_store_idx_0] = lhs_pf0; \ + rhs_shmem[rhs_store_idx_0] = rhs_pf0; \ + \ + lhs_shmem[lhs_store_idx_1] = lhs_pf1; \ + rhs_shmem[rhs_store_idx_1] = rhs_pf1; \ + \ + lhs_shmem[lhs_store_idx_2] = lhs_pf2; \ + rhs_shmem[rhs_store_idx_2] = rhs_pf2; \ + \ + lhs_shmem[lhs_store_idx_3] = lhs_pf3; \ + rhs_shmem[rhs_store_idx_3] = rhs_pf3; \ + \ + lhs_shmem[lhs_store_idx_4] = lhs_pf4; \ + rhs_shmem[rhs_store_idx_4] = rhs_pf4; \ + \ + lhs_shmem[lhs_store_idx_5] = lhs_pf5; \ + rhs_shmem[rhs_store_idx_5] = rhs_pf5; \ + \ + lhs_shmem[lhs_store_idx_6] = lhs_pf6; \ + rhs_shmem[rhs_store_idx_6] = rhs_pf6; \ + \ + lhs_shmem[lhs_store_idx_7] = lhs_pf7; \ + rhs_shmem[rhs_store_idx_7] = rhs_pf7; \ + + // declare and initialize result array +#define res(i, j) _res_##i##j +#define initResultRow(i) \ + Scalar res(i, 0) = conv(0); \ + Scalar res(i, 1) = conv(0); \ + Scalar res(i, 2) = conv(0); \ + Scalar res(i, 3) = conv(0); \ + Scalar res(i, 4) = conv(0); \ + Scalar res(i, 5) = conv(0); \ + Scalar res(i, 6) = conv(0); \ + Scalar res(i, 7) = conv(0); \ + + internal::scalar_cast_op conv; + initResultRow(0); + initResultRow(1); + initResultRow(2); + initResultRow(3); + initResultRow(4); + initResultRow(5); + initResultRow(6); + initResultRow(7); +#undef initResultRow + + for (Index base_k = 0; base_k < k_size; base_k += 64) { + // wait for previous iteration to finish with shmem. Despite common sense, + // the code is a bit faster with this here then at bottom of loop + __syncthreads(); + + prefetchIntoRegisters(base_k); + writeRegToShmem(); + + #undef prefetchIntoRegisters + #undef writeRegToShmem + + // wait for shared mem packing to be done before starting computation + __syncthreads(); + + // compute 8x8 matrix product by outer product. This involves packing one column + // of LHS and one row of RHS into registers (takes 16 registers). + +#define lcol(i) _lcol##i + Scalar lcol(0); + Scalar lcol(1); + Scalar lcol(2); + Scalar lcol(3); + Scalar lcol(4); + Scalar lcol(5); + Scalar lcol(6); + Scalar lcol(7); + +#define rrow(j) _rrow##j + Scalar rrow(0); + Scalar rrow(1); + Scalar rrow(2); + Scalar rrow(3); + Scalar rrow(4); + Scalar rrow(5); + Scalar rrow(6); + Scalar rrow(7); + + // Now x corresponds to k, y to m, and z to n + const Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y]; + const Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z]; + +#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))] +#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))] + +#define loadData(i, j) \ + lcol(0) = lhs_element(0, j); \ + rrow(0) = rhs_element(i, 0); \ + lcol(1) = lhs_element(1, j); \ + rrow(1) = rhs_element(i, 1); \ + lcol(2) = lhs_element(2, j); \ + rrow(2) = rhs_element(i, 2); \ + lcol(3) = lhs_element(3, j); \ + rrow(3) = rhs_element(i, 3); \ + lcol(4) = lhs_element(4, j); \ + rrow(4) = rhs_element(i, 4); \ + lcol(5) = lhs_element(5, j); \ + rrow(5) = rhs_element(i, 5); \ + lcol(6) = lhs_element(6, j); \ + rrow(6) = rhs_element(i, 6); \ + lcol(7) = lhs_element(7, j); \ + rrow(7) = rhs_element(i, 7); \ + +#define computeCol(j) \ + res(0, j) += lcol(0) * rrow(j); \ + res(1, j) += lcol(1) * rrow(j); \ + res(2, j) += lcol(2) * rrow(j); \ + res(3, j) += lcol(3) * rrow(j); \ + res(4, j) += lcol(4) * rrow(j); \ + res(5, j) += lcol(5) * rrow(j); \ + res(6, j) += lcol(6) * rrow(j); \ + res(7, j) += lcol(7) * rrow(j); \ + +#define computePass(i) \ + loadData(i, i); \ + \ + computeCol(0); \ + computeCol(1); \ + computeCol(2); \ + computeCol(3); \ + computeCol(4); \ + computeCol(5); \ + computeCol(6); \ + computeCol(7); \ + + computePass(0); + computePass(1); + computePass(2); + computePass(3); + computePass(4); + computePass(5); + computePass(6); + computePass(7); + +#undef lcol +#undef rrow +#undef lhs_element +#undef rhs_element +#undef loadData +#undef computeCol +#undef computePass + } // end loop over k + + // we've now iterated over all of the large (ie width 64) k blocks and + // accumulated results in registers. At this point thread (x, y, z) contains + // the sum across all big k blocks of the product of little k block of index (x, y) + // with block of index (y, z). To compute the final output, we need to reduce + // the 8 threads over y by summation. +#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask) + +#define reduceRow(i, mask) \ + shuffleInc(i, 0, mask); \ + shuffleInc(i, 1, mask); \ + shuffleInc(i, 2, mask); \ + shuffleInc(i, 3, mask); \ + shuffleInc(i, 4, mask); \ + shuffleInc(i, 5, mask); \ + shuffleInc(i, 6, mask); \ + shuffleInc(i, 7, mask); \ + +#define reduceMatrix(mask) \ + reduceRow(0, mask); \ + reduceRow(1, mask); \ + reduceRow(2, mask); \ + reduceRow(3, mask); \ + reduceRow(4, mask); \ + reduceRow(5, mask); \ + reduceRow(6, mask); \ + reduceRow(7, mask); \ + + // actually perform the reduction, now each thread of index (_, y, z) + // contains the correct values in its registers that belong in the output + // block + reduceMatrix(1); + reduceMatrix(2); + reduceMatrix(4); + +#undef shuffleInc +#undef reduceRow +#undef reduceMatrix + + // now we need to copy the 64 values into main memory. We can't split work + // among threads because all variables are in registers. There's 2 ways + // to do this: + // (1) have 1 thread do 64 writes from registers into global memory + // (2) have 1 thread do 64 writes into shared memory, and then 8 threads + // each do 8 writes into global memory. We can just overwrite the shared + // memory from the problem we just solved. + // (2) is slightly faster than (1) due to less branching and more ILP + + // TODO: won't yield much gain, but could just use currently unused shared mem + // and then we won't have to sync + // wait for shared mem to be out of use + __syncthreads(); + +#define writeResultShmem(i, j) \ + lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \ + +#define writeRow(i) \ + writeResultShmem(i, 0); \ + writeResultShmem(i, 1); \ + writeResultShmem(i, 2); \ + writeResultShmem(i, 3); \ + writeResultShmem(i, 4); \ + writeResultShmem(i, 5); \ + writeResultShmem(i, 6); \ + writeResultShmem(i, 7); \ + + if (threadIdx.x == 0) { + writeRow(0); + writeRow(1); + writeRow(2); + writeRow(3); + writeRow(4); + writeRow(5); + writeRow(6); + writeRow(7); + } +#undef writeResultShmem +#undef writeRow + + const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); + const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); + + if (threadIdx.x < max_i_write) { + if (max_j_write == 8) { + // TODO: can i trade bank conflicts for coalesced writes? + Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0]; + Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1]; + Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2]; + Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3]; + Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4]; + Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5]; + Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6]; + Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7]; + + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7; + } else { +#pragma unroll 7 + for (int j = 0; j < max_j_write; j++) { + Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j]; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val; + } + } + } +#undef res +} + + +template +__global__ void +__launch_bounds__(512) +EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ Scalar lhs_shmem[72 * 64]; + __shared__ Scalar rhs_shmem[72 * 64]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + if (base_m + 63 < m_size && base_n + 63 < n_size) { + EigenContractionKernelInternal(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); + } else { + EigenContractionKernelInternal(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); + } +} + + +template +__device__ EIGEN_STRONG_INLINE void +EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, float2 lhs_shmem2[][16], + float2 rhs_shmem2[][8], const Index m_size, + const Index n_size, const Index k_size, + const Index base_m, const Index base_n) { + typedef float Scalar; + + // prefetch registers + float4 lhs_pf0, rhs_pf0; + + float4 results[4]; + for (int i=0; i < 4; i++) { + results[i].x = results[i].y = results[i].z = results[i].w = 0; + } + + +#define prefetch_lhs(reg, row, col) \ + if (!CHECK_LHS_BOUNDARY) { \ + if (col < k_size) { \ + reg =lhs.loadPacket(row, col); \ + } \ + } else { \ + if (col < k_size) { \ + if (row + 3 < m_size) { \ + reg =lhs.loadPacket(row, col); \ + } else if (row + 2 < m_size) { \ + reg.x =lhs(row + 0, col); \ + reg.y =lhs(row + 1, col); \ + reg.z =lhs(row + 2, col); \ + } else if (row + 1 < m_size) { \ + reg.x =lhs(row + 0, col); \ + reg.y =lhs(row + 1, col); \ + } else if (row < m_size) { \ + reg.x =lhs(row + 0, col); \ + } \ + } \ + } \ + + + Index lhs_vert = base_m+threadIdx.x*4; + + for (Index k = 0; k < k_size; k += 16) { + lhs_pf0 = internal::pset1(0); + rhs_pf0 = internal::pset1(0); + + Index lhs_horiz = threadIdx.y+k; + prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz) + + Index rhs_vert = k+(threadIdx.x%4)*4; + Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n; + + if (!CHECK_RHS_BOUNDARY) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if (rhs_vert + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } else { + if (rhs_horiz0 < n_size) { + if ((rhs_vert + 3) < k_size) { + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + } else if ((rhs_vert + 2) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if ((rhs_vert + 1) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } + } + float x1, x2 ; + // the following can be a bitwise operation..... some day. + if((threadIdx.x%8) < 4) { + x1 = rhs_pf0.y; + x2 = rhs_pf0.w; + } else { + x1 = rhs_pf0.x; + x2 = rhs_pf0.z; + } + x1 = __shfl_xor(x1, 4); + x2 = __shfl_xor(x2, 4); + if((threadIdx.x%8) < 4) { + rhs_pf0.y = x1; + rhs_pf0.w = x2; + } else { + rhs_pf0.x = x1; + rhs_pf0.z = x2; + } + + // We have 64 features. + // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1. + // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3. + // ... + // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63 + // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1 + // ... + rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y); + rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w); + + // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // ... + // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) + // ... + + lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y); + lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w); + + +#define add_vals(fl1, fl2, fr1, fr2)\ + results[0].x += fl1.x * fr1.x;\ + results[0].y += fl1.y * fr1.x;\ + results[0].z += fl2.x * fr1.x;\ + results[0].w += fl2.y * fr1.x;\ +\ + results[1].x += fl1.x * fr1.y;\ + results[1].y += fl1.y * fr1.y;\ + results[1].z += fl2.x * fr1.y;\ + results[1].w += fl2.y * fr1.y;\ +\ + results[2].x += fl1.x * fr2.x;\ + results[2].y += fl1.y * fr2.x;\ + results[2].z += fl2.x * fr2.x;\ + results[2].w += fl2.y * fr2.x;\ +\ + results[3].x += fl1.x * fr2.y;\ + results[3].y += fl1.y * fr2.y;\ + results[3].z += fl2.x * fr2.y;\ + results[3].w += fl2.y * fr2.y;\ + + __syncthreads(); + + // Do the multiplies. + #pragma unroll + for (int koff = 0; koff < 16; koff ++) { + // 32 x threads. + float2 fl1 = lhs_shmem2[koff][threadIdx.x]; + float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x]; + + int start_feature = threadIdx.y * 4; + float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; + float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; + + add_vals(fl1, fl2, fr1, fr2) + } + __syncthreads(); + } + +#undef prefetch_lhs +#undef add_vals + + Index horiz_base = threadIdx.y*4+base_n; + if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (!CHECK_RHS_BOUNDARY) { + // CHECK LHS + if (lhs_vert + 3 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (lhs_vert + 2 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + } + } else if (lhs_vert + 1 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + } + } else if (lhs_vert < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + } + } + } else if (!CHECK_LHS_BOUNDARY) { + // CHECK RHS + /* + int ncols_rem = fminf(n_size- horiz_base, 4); + for (int i = 0; i < ncols_rem; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + }*/ + for (int i = 0; i < 4; i++) { + if (horiz_base+i < n_size) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } else { + // CHECK both boundaries. + for (int i = 0; i < 4; i++) { + if (horiz_base+i < n_size) { + if (lhs_vert < m_size) + output(lhs_vert, horiz_base + i) = results[i].x; + if (lhs_vert + 1 < m_size) + output(lhs_vert + 1, horiz_base + i) = results[i].y; + if (lhs_vert + 2 < m_size) + output(lhs_vert + 2, horiz_base + i) = results[i].z; + if (lhs_vert + 3 < m_size) + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } +} + + +template +__device__ EIGEN_STRONG_INLINE void +EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, float2 lhs_shmem2[][32], + float2 rhs_shmem2[][8], const Index m_size, + const Index n_size, const Index k_size, + const Index base_m, const Index base_n) { + typedef float Scalar; + + // prefetch registers + float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3; + float4 rhs_pf0, rhs_pf1; + + float4 results[8]; + for (int i=0; i < 8; i++) { + results[i].x = results[i].y = results[i].z = results[i].w = 0; + } + + + Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32; + for (Index k = 0; k < k_size; k += 32) { + lhs_pf0 = internal::pset1(0); + lhs_pf1 = internal::pset1(0); + lhs_pf2 = internal::pset1(0); + lhs_pf3 = internal::pset1(0); + + rhs_pf0 = internal::pset1(0); + rhs_pf1 = internal::pset1(0); + + if (!CHECK_LHS_BOUNDARY) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + } + } else { + // just CHECK_LHS_BOUNDARY + if (lhs_vert + 3 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + } + } else if (lhs_vert + 2 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); + lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + } + } else if (lhs_vert + 1 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + } + } else if (lhs_vert < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + } + } + } + __syncthreads(); + Index rhs_vert = k+threadIdx.x*4; + Index rhs_horiz0 = threadIdx.y*2+base_n; + Index rhs_horiz1 = threadIdx.y*2+1+base_n; + if (!CHECK_RHS_BOUNDARY) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); + } else if (rhs_vert + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + } + } else { + if (rhs_horiz1 < n_size) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); + } else if (k+threadIdx.x*4 + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + } else if (k+threadIdx.x*4 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + } + } else if (rhs_horiz0 < n_size) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + } else if ((rhs_vert + 2) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if ((rhs_vert + 1) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } + } + __syncthreads(); + // Loaded. Do computation + // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1. + // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3. + // .. + // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63 + rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x); + // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1. + // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3. + // .. + rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y); + // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1. + // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3. + rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z); + // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1. + // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3. + rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w); + + // LHS. + // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) + // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) + // ... + // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) + // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) + + +#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\ + results[0].x += a_feat1.x * f1.x;\ + results[1].x += a_feat1.x * f1.y;\ + results[2].x += a_feat1.x * f2.x;\ + results[3].x += a_feat1.x * f2.y;\ + results[4].x += a_feat1.x * f3.x;\ + results[5].x += a_feat1.x * f3.y;\ + results[6].x += a_feat1.x * f4.x;\ + results[7].x += a_feat1.x * f4.y;\ +\ + results[0].y += a_feat1.y * f1.x;\ + results[1].y += a_feat1.y * f1.y;\ + results[2].y += a_feat1.y * f2.x;\ + results[3].y += a_feat1.y * f2.y;\ + results[4].y += a_feat1.y * f3.x;\ + results[5].y += a_feat1.y * f3.y;\ + results[6].y += a_feat1.y * f4.x;\ + results[7].y += a_feat1.y * f4.y;\ +\ + results[0].z += a_feat2.x * f1.x;\ + results[1].z += a_feat2.x * f1.y;\ + results[2].z += a_feat2.x * f2.x;\ + results[3].z += a_feat2.x * f2.y;\ + results[4].z += a_feat2.x * f3.x;\ + results[5].z += a_feat2.x * f3.y;\ + results[6].z += a_feat2.x * f4.x;\ + results[7].z += a_feat2.x * f4.y;\ +\ + results[0].w += a_feat2.y * f1.x;\ + results[1].w += a_feat2.y * f1.y;\ + results[2].w += a_feat2.y * f2.x;\ + results[3].w += a_feat2.y * f2.y;\ + results[4].w += a_feat2.y * f3.x;\ + results[5].w += a_feat2.y * f3.y;\ + results[6].w += a_feat2.y * f4.x;\ + results[7].w += a_feat2.y * f4.y;\ + + lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y); + lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y); + lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y); + lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y); + + lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w); + lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w); + lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w); + lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w); + + __syncthreads(); + + // Do the multiplies. + #pragma unroll + for (int koff = 0; koff < 32; koff ++) { + float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8]; + float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8]; + + // first feature is at (threadIdx.y/4) * 8 last is at start + 8. + int start_feature = (threadIdx.y / 4) * 8; + + float2 br1 = rhs_shmem2[start_feature/2 + (koff % 4) * 32][koff/4]; + float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4]; + float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4]; + float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4]; + + add_vals(a3, a4, br1, br2, br3, br4) + } + __syncthreads(); + } // end loop over k + + + __syncthreads(); + Index horiz_base = (threadIdx.y/4)*8+base_n; + if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (!CHECK_RHS_BOUNDARY) { + if (lhs_vert + 3 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (lhs_vert + 2 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + } + } else if (lhs_vert + 1 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + } + } else if (lhs_vert < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + } + } + } else if (!CHECK_LHS_BOUNDARY) { + // CHECK BOUNDARY_B + for (int i = 0; i < 8; i++) { + if (horiz_base + i < n_size) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } else { + // CHECK both boundaries. + for (int i = 0; i < 8; i++) { + if (horiz_base + i < n_size) { + if (lhs_vert < m_size) + output(lhs_vert, horiz_base + i) = results[i].x; + if (lhs_vert + 1 < m_size) + output(lhs_vert + 1, horiz_base + i) = results[i].y; + if (lhs_vert + 2 < m_size) + output(lhs_vert + 2, horiz_base + i) = results[i].z; + if (lhs_vert + 3 < m_size) + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } +} + + +template +__global__ void +__launch_bounds__(256) +EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ float2 lhs_shmem[64*32]; + __shared__ float2 rhs_shmem[128*8]; + + typedef float2 LHS_MEM[64][32]; + typedef float2 RHS_MEM[128][8]; + + typedef float2 LHS_MEM16x16[32][16]; + typedef float2 RHS_MEM16x16[64][8]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 128 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + bool check_rhs = (base_n + 63) >= n_size; + bool check_lhs128 = (base_m + 127) >= m_size; + + if (!check_rhs) { + if (!check_lhs128) { + // >= 128 rows left + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } + } else { + if (!check_lhs128) { + // >= 128 rows left + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } + } +} + +template +__global__ void +__launch_bounds__(256) +EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ float2 lhs_shmem[32][16]; + __shared__ float2 rhs_shmem[64][8]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + if (base_m + 63 < m_size) { + if (base_n + 63 < n_size) { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } + } else { + if (base_n + 63 < n_size) { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } + } +} + + +template +struct TensorEvaluator, GpuDevice> : + public TensorContractionEvaluatorBase, GpuDevice> > { + + typedef GpuDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout, + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + // typedefs needed in evalTo + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + typedef typename LeftEvaluator::Dimensions LeftDimensions; + typedef typename RightEvaluator::Dimensions RightDimensions; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) {} + + // We need to redefine this method to make nvcc happy + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + this->m_leftImpl.evalSubExprsIfNeeded(NULL); + this->m_rightImpl.evalSubExprsIfNeeded(NULL); + if (data) { + evalTo(data); + return false; + } else { + this->m_result = static_cast(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar))); + evalTo(this->m_result); + return true; + } + } + + void evalTo(Scalar* buffer) const { + if (this->m_lhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + } + else { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + } + } + + template struct LaunchKernels { + static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { + const Index m_blocks = (m + 63) / 64; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(8, 8, 8); + LAUNCH_CUDA_KERNEL((EigenContractionKernel), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } + }; + + template struct LaunchKernels { + static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { + if (m < 768 || n < 768) { + const Index m_blocks = (m + 63) / 64; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(16, 16, 1); + LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } else { + const Index m_blocks = (m + 127) / 128; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(8, 32, 1); + LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } + } + }; + + template + void evalTyped(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + EIGEN_UNUSED_VARIABLE(k) + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + setCudaSharedMemConfig(cudaSharedMemBankSizeEightByte); + LaunchKernels::Run(lhs, rhs, output, m, n, k, this->m_device); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_USE_GPU and __CUDACC__ +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h new file mode 100644 index 000000000..9b2cb3ff6 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -0,0 +1,467 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H + +namespace Eigen { + +namespace internal { + +enum { + Rhs = 0, + Lhs = 1 +}; + +/* + * Implementation of the Eigen blas_data_mapper class for tensors. + */ + +template struct CoeffLoader { + enum { + DirectOffsets = false + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) { + eigen_assert(false && "unsupported"); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename Tensor::PacketReturnType packet(typename Tensor::Index index) const + { + return m_tensor.template packet(index); + } + + + private: + const Tensor m_tensor; +}; + +template struct CoeffLoader { + enum { + DirectOffsets = true + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { + m_data += offset; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename Tensor::PacketReturnType packet(typename Tensor::Index index) const + { + return internal::ploadt_ro(m_data + index); + } + private: + typedef typename Tensor::Scalar Scalar; + const Scalar* m_data; +}; + +template +class SimpleTensorContractionMapper { + public: + EIGEN_DEVICE_FUNC + SimpleTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + m_tensor(tensor), + m_nocontract_strides(nocontract_strides), + m_ij_strides(ij_strides), + m_contract_strides(contract_strides), + m_k_strides(k_strides) { } + + enum { + DirectOffsets = CoeffLoader::DirectOffsets + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { + m_tensor.offsetBuffer(offset); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row) const { + // column major assumption + return operator()(row, 0); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { + return m_tensor.coeff(computeIndex(row, col)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { + const bool left = (side == Lhs); + Index nocontract_val = left ? row : col; + Index linidx = 0; + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = nocontract_val / m_ij_strides[i]; + linidx += idx * m_nocontract_strides[i]; + nocontract_val -= idx * m_ij_strides[i]; + } + if (array_size::value > array_size::value) { + if (side == Lhs && inner_dim_contiguous) { + eigen_assert(m_nocontract_strides[0] == 1); + linidx += nocontract_val; + } else { + linidx += nocontract_val * m_nocontract_strides[0]; + } + } + + Index contract_val = left ? col : row; + if(array_size::value > 0) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = contract_val / m_k_strides[i]; + linidx += idx * m_contract_strides[i]; + contract_val -= idx * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx += contract_val; + } else { + linidx += contract_val * m_contract_strides[0]; + } + } + + return linidx; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE IndexPair computeIndexPair(Index row, Index col, const Index distance) const { + const bool left = (side == Lhs); + Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; + Index linidx[2] = {0, 0}; + if (array_size::value > array_size::value) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = nocontract_val[0] / m_ij_strides[i]; + const Index idx1 = nocontract_val[1] / m_ij_strides[i]; + linidx[0] += idx0 * m_nocontract_strides[i]; + linidx[1] += idx1 * m_nocontract_strides[i]; + nocontract_val[0] -= idx0 * m_ij_strides[i]; + nocontract_val[1] -= idx1 * m_ij_strides[i]; + } + if (side == Lhs && inner_dim_contiguous) { + eigen_assert(m_nocontract_strides[0] == 1); + linidx[0] += nocontract_val[0]; + linidx[1] += nocontract_val[1]; + } else { + linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; + linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; + } + } + + Index contract_val[2] = {left ? col : row, left ? col : row + distance}; + if (array_size::value> 0) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = contract_val[0] / m_k_strides[i]; + const Index idx1 = contract_val[1] / m_k_strides[i]; + linidx[0] += idx0 * m_contract_strides[i]; + linidx[1] += idx1 * m_contract_strides[i]; + contract_val[0] -= idx0 * m_k_strides[i]; + contract_val[1] -= idx1 * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx[0] += contract_val[0]; + linidx[1] += contract_val[1]; + } else { + linidx[0] += contract_val[0] * m_contract_strides[0]; + linidx[1] += contract_val[1] * m_contract_strides[0]; + } + } + return IndexPair(linidx[0], linidx[1]); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const { + // Only claim alignment when we can compute the actual stride (ie when we're + // dealing with the lhs with inner_dim_contiguous. This is because the + // matrix-vector product relies on the stride when dealing with aligned inputs. + return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { + return ((side == Lhs) && inner_dim_contiguous && array_size::value > 0) ? m_contract_strides[0] : 1; + } + + protected: + CoeffLoader m_tensor; + const nocontract_t m_nocontract_strides; + const nocontract_t m_ij_strides; + const contract_t m_contract_strides; + const contract_t m_k_strides; +}; + + +template +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ + public: + typedef SimpleTensorContractionMapper ParentMapper; + + EIGEN_DEVICE_FUNC + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + typedef typename Tensor::PacketReturnType Packet; + typedef typename unpacket_traits::half HalfPacket; + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { + // whole method makes column major assumption + + // don't need to add offsets for now (because operator handles that) + // current code assumes packet size must be a multiple of 2 + EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + + if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { + const Index index = this->computeIndex(i, j); + eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); + return this->m_tensor.template packet(index); + } + + const IndexPair indexPair = this->computeIndexPair(i, j, packet_size - 1); + const Index first = indexPair.first; + const Index last = indexPair.second; + + // We can always do optimized packet reads from left hand side right now, because + // the vertical matrix dimension on the left hand side is never contracting. + // On the right hand side we need to check if the contracting dimensions may have + // been shuffled first. + if (Tensor::PacketAccess && + (side == Lhs || internal::array_size::value <= 1 || !inner_dim_reordered) && + (last - first) == (packet_size - 1)) { + + return this->m_tensor.template packet(first); + } + + EIGEN_ALIGN_MAX Scalar data[packet_size]; + + data[0] = this->m_tensor.coeff(first); + for (Index k = 1; k < packet_size - 1; k += 2) { + const IndexPair internal_pair = this->computeIndexPair(i + k, j, 1); + data[k] = this->m_tensor.coeff(internal_pair.first); + data[k + 1] = this->m_tensor.coeff(internal_pair.second); + } + data[packet_size - 1] = this->m_tensor.coeff(last); + + return pload(data); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { + // whole method makes column major assumption + + // don't need to add offsets for now (because operator handles that) + const Index half_packet_size = unpacket_traits::size; + if (half_packet_size == packet_size) { + return loadPacket(i, j); + } + EIGEN_ALIGN_MAX Scalar data[half_packet_size]; + for (Index k = 0; k < half_packet_size; k++) { + data[k] = operator()(i + k, j); + } + return pload(data); + } +}; + + +template +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ + public: + typedef SimpleTensorContractionMapper ParentMapper; + + EIGEN_DEVICE_FUNC + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + typedef typename Tensor::PacketReturnType Packet; + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { + EIGEN_ALIGN_MAX Scalar data[1]; + data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); + return pload(data); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { + return loadPacket(i, j); + } +}; + + +template +class TensorContractionSubMapper { + public: + typedef typename Tensor::PacketReturnType Packet; + typedef typename unpacket_traits::half HalfPacket; + + typedef BaseTensorContractionMapper ParentMapper; + typedef TensorContractionSubMapper Self; + typedef Self LinearMapper; + + enum { + // We can use direct offsets iff the parent mapper supports then and we can compute the strides. + // TODO: we should also enable direct offsets for the Rhs case. + UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size::value > 0) + }; + + EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { + // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute + // this offset every time we attempt to access a coefficient. + if (UseDirectOffsets) { + Index stride = m_base_mapper.stride(); + m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride); + } + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper(i, 0); + } + return m_base_mapper(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper(i, j); + } + return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, 0); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, j); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadHalfPacket(i, 0); + } + return m_base_mapper.template loadHalfPacket(i + m_vert_offset, m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { + if (UseDirectOffsets) { + m_base_mapper.storePacket(i, 0, p); + } + m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + if (UseDirectOffsets) { + return LinearMapper(m_base_mapper, i, j); + } + return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { + EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned; + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, 0); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const { + return false; + } + + private: + ParentMapper m_base_mapper; + const Index m_vert_offset; + const Index m_horiz_offset; +}; + + +template +class TensorContractionInputMapper + : public BaseTensorContractionMapper { + + public: + typedef Scalar_ Scalar; + typedef BaseTensorContractionMapper Base; + typedef TensorContractionSubMapper SubMapper; + typedef SubMapper VectorMapper; + + EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) + : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(*this, i, j); + } +}; + + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h new file mode 100644 index 000000000..ee16cde9b --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -0,0 +1,1052 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H + +// evaluator for thread pool device +#ifdef EIGEN_USE_THREADS + +namespace Eigen { + +#ifdef EIGEN_USE_SIMPLE_THREAD_POOL +namespace internal { + +template +struct packLhsArg { + LhsScalar* blockA; + const LhsMapper& lhs; + const Index m_start; + const Index k_start; + const Index mc; + const Index kc; +}; + +template +struct packRhsAndKernelArg { + const MaxSizeVector* blockAs; + RhsScalar* blockB; + const RhsMapper& rhs; + OutputMapper& output; + const Index m; + const Index k; + const Index n; + const Index mc; + const Index kc; + const Index nc; + const Index num_threads; + const Index num_blockAs; + const Index max_m; + const Index k_block_idx; + const Index m_block_idx; + const Index n_block_idx; + const Index m_blocks; + const Index n_blocks; + MaxSizeVector* kernel_notifications; + const MaxSizeVector* lhs_notifications; + const bool need_to_pack; +}; + +} // end namespace internal +#endif // EIGEN_USE_SIMPLE_THREAD_POOL + +template +struct TensorEvaluator, ThreadPoolDevice> : + public TensorContractionEvaluatorBase, ThreadPoolDevice> > { + + typedef ThreadPoolDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout, + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + // typedefs needed in evalTo + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef typename internal::gebp_traits Traits; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) {} + +#ifndef EIGEN_USE_SIMPLE_THREAD_POOL + template + void evalProduct(Scalar* buffer) const { + typedef + typename internal::remove_const::type + LhsScalar; + typedef + typename internal::remove_const::type + RhsScalar; + typedef typename internal::gebp_traits Traits; + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + typedef internal::TensorContractionInputMapper< + LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, + contract_t, internal::packet_traits::size, + lhs_inner_dim_contiguous, false, Unaligned> + LhsMapper; + typedef internal::TensorContractionInputMapper< + RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, + contract_t, internal::packet_traits::size, + rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> + RhsMapper; + typedef internal::blas_data_mapper OutputMapper; + typedef internal::gemm_pack_lhs + LhsPacker; + typedef internal::gemm_pack_rhs< + RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> + RhsPacker; + typedef internal::gebp_kernel + GebpKernel; + + const Index m = this->m_i_size; + const Index n = this->m_j_size; + const Index k = this->m_k_size; + if (m == 0 || n == 0 || k == 0) return; + + // Compute a set of algorithm parameters: + // - kernel block sizes (bm, bn, bk) + // - task grain sizes (number of kernels executed per task: gm, gn) + // - number of threads + // - sharding by row/column + // - parallel packing or first lhs then rhs + // and some derived parameters: + // - number of tasks (nm, nn, nk) + // - number of kernels (nm0, nn0) + // Unfortunately, all these parameters are tightly interdependent. + // So in some cases we first compute approximate values, then compute other + // values based on these approximations and then refine the approximations. + + // There are lots of heuristics here. There is some reasoning behind them, + // but ultimately they are just tuned on contraction benchmarks for + // different input configurations, thread counts and instruction sets. + // So feel free to question any of them. + + // Compute whether we want to shard by row or by column. + // This is a first approximation, it will be refined later. Since we don't + // know number of threads yet we use 2, because what's we are most + // interested in at this point is whether it makes sense to use + // parallelization at all or not. + bool shard_by_col = shardByCol(m, n, 2); + + // First approximation of kernel blocking sizes. + // Again, we don't know number of threads yet, so we use 2. + Index bm, bn, bk; + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Compute optimal number of threads. + // Note: we use bk instead of k here because we are interested in amount of + // _parallelizable_ computations, and computations are not parallelizable + // across k dimension. + const TensorOpCost cost = + contractionCost(m, n, bm, bn, bk, shard_by_col, false); + int num_threads = TensorCostModel::numThreads( + static_cast(n) * m, cost, this->m_device.numThreads()); + + // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost + // model is not tuned. Remove this when the cost model is tuned. + if (n == 1) num_threads = 1; + + if (num_threads == 1) { + // The single-threaded algorithm should be faster in this case. + if (n == 1) + this->template evalGemv(buffer); + else + this->template evalGemm(buffer); + return; + } + + // Now that we know number of threads, recalculate sharding and blocking. + shard_by_col = shardByCol(m, n, num_threads); + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Number of kernels for each dimension. + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index nk = divup(k, bk); + + // Calculate task grain size (number of kernels executed per task). + // This task size coarsening serves two purposes: + // 1. It reduces per-task overheads including synchronization overheads. + // 2. It allows to use caches better (reuse the same packed rhs in several + // consecutive kernels). + Index gm = 1; + Index gn = 1; + // If we are sharding by column, then we prefer to reduce rows first. + if (shard_by_col) { + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + } else { + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + } + // Number of tasks in each dimension. + Index nm = divup(nm0, gm); + Index nn = divup(nn0, gn); + + // Last by not least, decide whether we want to issue both lhs and rhs + // packing in parallel; or issue lhs packing first, and then issue rhs + // packing when lhs packing completes (for !shard_by_col lhs and rhs are + // swapped). Parallel packing allows more parallelism (for both packing and + // kernels), while sequential packing provides better locality (once + // a thread finishes rhs packing it proceed to kernels with that rhs). + // First, we are interested in parallel packing if there are few tasks. + bool parallel_pack = num_threads >= nm * nn; + // Also do parallel packing if all data fits into L2$. + if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <= + l2CacheSize() * num_threads) + parallel_pack = true; + // But don't do it if we will use each rhs only once. Locality seems to be + // more important in this case. + if ((shard_by_col ? nm : nn) == 1) parallel_pack = false; + + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, + this->m_i_strides, this->m_left_contracting_strides, + this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, + this->m_j_strides, this->m_right_contracting_strides, + this->m_k_strides); + + Context(this->m_device, num_threads, lhs, rhs, buffer, m, n, + k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0, + shard_by_col, parallel_pack) + .run(); + } + + // Context coordinates a single parallel gemm operation. + template + class Context { + public: + Context(const Device& device, int num_threads, LhsMapper& lhs, + RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, + Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, + Index gn, Index nm0, Index nn0, bool shard_by_col, + bool parallel_pack) + : device_(device), + lhs_(lhs), + rhs_(rhs), + buffer_(buffer), + output_(buffer, tm), + num_threads_(num_threads), + shard_by_col_(shard_by_col), + parallel_pack_(parallel_pack), + m_(tm), + n_(tn), + k_(tk), + bm_(bm), + bn_(bn), + bk_(bk), + nm_(nm), + nn_(nn), + nk_(nk), + gm_(gm), + gn_(gn), + nm0_(nm0), + nn0_(nn0) + { + for (Index x = 0; x < P; x++) { + // Normal number of notifications for k slice switch is + // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only + // nm_ + nn_ notifications, because they will not receive notifications + // from preceeding kernels. + state_switch_[x] = + x == 0 + ? 1 + : (parallel_pack_ ? nn_ + nm_ : (shard_by_col_ ? nn_ : nm_)) + + (x == P - 1 ? nm_ * nn_ : 0); + state_packing_ready_[x] = + parallel_pack_ ? 0 : (shard_by_col_ ? nm_ : nn_); + state_kernel_[x] = new std::atomic*[nm_]; + for (Index m = 0; m < nm_; m++) { + state_kernel_[x][m] = new std::atomic[nn_]; + // Kernels generally receive 3 notifications (previous kernel + 2 + // packing), but the first slice won't get notifications from previous + // kernels. + for (Index n = 0; n < nn_; n++) + state_kernel_[x][m][n].store( + (x == 0 ? 0 : 1) + (parallel_pack_ ? 2 : 1), + std::memory_order_relaxed); + } + } + + // Allocate memory for packed rhs/lhs matrices. + size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); + size_t lhs_size = + divup(bm_ * bk_ * sizeof(LhsScalar), align) * align; + size_t rhs_size = + divup(bn_ * bk_ * sizeof(RhsScalar), align) * align; + packed_mem_ = static_cast(internal::aligned_malloc( + (nm0_ * lhs_size + nn0_ * rhs_size) * std::min(nk_, P - 1))); + char* mem = static_cast(packed_mem_); + for (Index x = 0; x < numext::mini(nk_, P - 1); x++) { + packed_lhs_[x].resize(nm0_); + for (Index m = 0; m < nm0_; m++) { + packed_lhs_[x][m] = reinterpret_cast(mem); + mem += lhs_size; + } + packed_rhs_[x].resize(nn0_); + for (Index n = 0; n < nn0_; n++) { + packed_rhs_[x][n] = reinterpret_cast(mem); + mem += rhs_size; + } + } + } + + ~Context() { + for (Index x = 0; x < P; x++) { + for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; + delete[] state_kernel_[x]; + } + internal::aligned_free(packed_mem_); + } + + void run() { + // Kick off packing of the first slice. + signal_switch(0, 1); + // Wait for overall completion. + // TODO(dvyukov): this wait can lead to deadlock. + // If nthreads contractions are concurrently submitted from worker + // threads, this wait will block all worker threads and the system will + // deadlock. + done_.Wait(); + } + + private: + Notification done_; + const Device& device_; + LhsMapper& lhs_; + RhsMapper& rhs_; + Scalar* const buffer_; + OutputMapper output_; + const int num_threads_; + const bool shard_by_col_; + const bool parallel_pack_; + // Matrix sizes. + const Index m_; + const Index n_; + const Index k_; + // Block sizes. + const Index bm_; + const Index bn_; + const Index bk_; + // Number of tasks. + const Index nm_; + const Index nn_; + const Index nk_; + // Task grain sizes (number of kernels executed per task). + const Index gm_; + const Index gn_; + // Number of blocks (this is different from ni_/nn_ because of task size + // coarsening). + const Index nm0_; + const Index nn0_; + + // Parallelization strategy. + // + // Blocks related to the same k block can run in parallel because they write + // to different output blocks. So we parallelize within k slices, this + // gives us parallelism level of m x n. Before we can start any kernels + // related to k-th slice, we need to issue m lhs packing tasks and n rhs + // packing tasks. + // + // However, there is a bottleneck when we are finishing kernels for k-th + // slice (at the very end there is only 1 runnable kernel). To mitigate this + // bottleneck we allow kernels from k-th and k+1-th slices to run in + // parallel. Note that (m, n, k) and (m, n, k+1) kernels write to the same + // output block, so they must not run in parallel. + // + // This gives us the following dependency graph. + // On each k slice we have m x n kernel tasks, m lhs paking tasks and n rhs + // packing tasks. + // Kernel (m, n, k) can start when: + // - kernel (m, n, k-1) has finished + // - lhs packing (m, k) has finished + // - rhs packing (n, k) has finished + // Lhs/rhs packing can start when: + // - all k-1 packing has finished (artificially imposed to limit amount of + // parallel packing) + // + // On top of that we limit runnable tasks to two consecutive k slices. + // This is done to limit amount of memory we need for packed lhs/rhs + // (for each k slice we need m*bk + n*bk memory in packed_lhs_/packed_rhs_). + // + // state_switch_ tracks when we are ready to switch to the next k slice. + // state_kernel_[m][n] tracks when we are ready to kick off kernel (m, n). + // These variable are rolling over 3 consecutive k slices: first two we are + // actively executing + one to track completion of kernels in the second + // slice. + static const Index P = 3; + void* packed_mem_; + std::vector packed_lhs_[P - 1]; + std::vector packed_rhs_[P - 1]; + std::atomic** state_kernel_[P]; + // state_switch_ is frequently modified by worker threads, while other + // fields are read-only after constructor. Let's move it to a separate cache + // line to reduce cache-coherency traffic. + char pad_[128]; + std::atomic state_packing_ready_[P]; + std::atomic state_switch_[P]; + + void pack_lhs(Index m, Index k) { + const Index mend = m * gm_ + gm(m); + for (Index m1 = m * gm_; m1 < mend; m1++) + LhsPacker()(packed_lhs_[k % (P - 1)][m1], + lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1)); + + if (!parallel_pack_ && shard_by_col_) { + signal_packing(k); + } else { + signal_switch(k + 1); + for (Index n = nn_ - 1; n >= 0; n--) signal_kernel(m, n, k, n == 0); + } + } + + void pack_rhs(Index n, Index k) { + const Index nend = n * gn_ + gn(n); + for (Index n1 = n * gn_; n1 < nend; n1++) { + if (k == 0) { + // Zero the output memory in parallel. + // On 10000x2x10000 mm zeroing can easily take half of time. + // Zero (bn x m) row. Safe to do here because all kernels that will + // write to this memory depend on completion of this task. + // Note: don't call device_.memset() here. device_.memset() blocks on + // thread pool worker thread, which can lead to underutilization and + // deadlocks. + memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar)); + } + RhsPacker()(packed_rhs_[k % (P - 1)][n1], + rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1)); + } + + if (parallel_pack_ || shard_by_col_) { + signal_switch(k + 1); + for (Index m = nm_ - 1; m >= 0; m--) signal_kernel(m, n, k, m == 0); + } else { + signal_packing(k); + } + } + + void kernel(Index m, Index n, Index k) { + // Note: order of iteration matters here. Iteration over m is innermost + // because we want to reuse the same packed rhs in consequetive tasks + // (rhs fits into L2$ while lhs only into L3$). + const Index nend = n * gn_ + gn(n); + const Index mend = m * gm_ + gm(m); + if (shard_by_col_) { + for (Index n1 = n * gn_; n1 < nend; n1++) { + for (Index m1 = m * gm_; m1 < mend; m1++) + GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), + packed_lhs_[k % (P - 1)][m1], + packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), + Scalar(1), -1, -1, 0, 0); + } + } else { + for (Index m1 = m * gm_; m1 < mend; m1++) + for (Index n1 = n * gn_; n1 < nend; n1++) { + GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), + packed_lhs_[k % (P - 1)][m1], + packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), + Scalar(1), -1, -1, 0, 0); + } + } + signal_kernel(m, n, k + 1, false); + signal_switch(k + 2); + } + + void signal_packing(Index k) { + eigen_assert(!parallel_pack_); + Index s = state_packing_ready_[k % P].fetch_sub(1); + eigen_assert(s > 0); + if (s != 1) return; + state_packing_ready_[k % P] = shard_by_col_ ? nm_ : nn_; + enqueue_packing(k, shard_by_col_); + } + + void signal_kernel(Index m, Index n, Index k, bool sync) { + std::atomic* state = &state_kernel_[k % P][m][n]; + Index s = state->load(); + eigen_assert(s > 0); + if (s != 1 && state->fetch_sub(1) != 1) return; + state->store(parallel_pack_ ? 3 : 2, std::memory_order_relaxed); + if (sync) + kernel(m, n, k); + else + device_.enqueueNoNotification([=]() { kernel(m, n, k); }); + } + + void signal_switch(Index k, Index v = 1) { + Index s = state_switch_[k % P].fetch_sub(v); + eigen_assert(s >= v); + if (s != v) return; + + // Ready to switch to the next k slice. + // Reset counter for the next iteration. + state_switch_[k % P] = + (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) + + nm_ * nn_; + if (k < nk_) { + // Issue lhs/rhs packing. Their completion will in turn kick off + // kernels. + if (parallel_pack_) { + enqueue_packing(k, !shard_by_col_); + enqueue_packing(k, shard_by_col_); + } else if (shard_by_col_) { + enqueue_packing(k, false); + } else { + enqueue_packing(k, true); + } + + // Termination handling. + // Because kernel completion signals k + 2 switch, we need to finish nk + // + 2 slices without issuing any tasks on nk + 1 slice. So here we + // pretend that all nk + 1 packing tasks just finish instantly; so that + // nk + 2 switch only waits for completion of nk kernels. + } else if (k == nk_) { + signal_switch(k + 1, + parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)); + } else { + done_.Notify(); + } + } + + // Enqueue all rhs/lhs packing for k-th slice. + void enqueue_packing(Index k, bool rhs) { + enqueue_packing_helper(0, rhs ? nn_ : nm_, k, rhs); + } + + void enqueue_packing_helper(Index start, Index end, Index k, bool rhs) { + if (end - start == 1) { + if (rhs) + pack_rhs(start, k); + else + pack_lhs(start, k); + } else { + Index mid = (start + end) / 2; + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(mid, end, k, rhs); }); + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(start, mid, k, rhs); }); + } + } + + // Block sizes with accounting for potentially incomplete last block. + Index bm(Index m) const { return m + 1 < nm0_ ? bm_ : m_ + bm_ - bm_ * nm0_; } + Index bn(Index n) const { return n + 1 < nn0_ ? bn_ : n_ + bn_ - bn_ * nn0_; } + Index bk(Index k) const { return k + 1 < nk_ ? bk_ : k_ + bk_ - bk_ * nk_; } + // Task grain sizes accounting for potentially incomplete last task. + Index gm(Index m) const { return m + 1 < nm_ ? gm_ : nm0_ + gm_ - gm_ * nm_; } + Index gn(Index n) const { return n + 1 < nn_ ? gn_ : nn0_ + gn_ - gn_ * nn_; } + + Context(const Context&) = delete; + void operator=(const Context&) = delete; + }; + + // Decide whether we want to shard m x n contraction by columns or by rows. + static bool shardByCol(Index m, Index n, Index num_threads) { + // Note: we are comparing both n and m against Traits::nr, it is not + // a mistake. We are trying to figure out how both n and m will fit into + // the main sharding dimension. + + // Sharding by column is the default + // ... unless there is enough data for vectorization over rows + if (m / num_threads >= Traits::nr && + // and not enough data for vectorization over columns + (n / num_threads < Traits::nr || + // ... or barely enough data for vectorization over columns, + // but it is not evenly dividable across threads + (n / num_threads < 4 * Traits::nr && + (n % (num_threads * Traits::nr)) != 0 && + // ... and it is evenly dividable across threads for rows + ((m % (num_threads * Traits::nr)) == 0 || + // .. or it is not evenly dividable for both dimensions but + // there is much more data over rows so that corner effects are + // mitigated. + (m / n >= 6))))) + return false; + // Wait, or if matrices are just substantially prolonged over the other + // dimension. + if (n / num_threads < 16 * Traits::nr && m > n * 32) return false; + return true; + } + + Index coarsenM(Index m, Index n, Index bm, Index bn, Index bk, Index gn, + int num_threads, bool shard_by_col) const { + Index gm = 1; + Index gm1 = 1; + Index nm0 = divup(m, bm); + Index nm1 = nm0; + for (;;) { + // Find the next candidate for m grain size. It needs to result in + // different number of blocks. E.g. if we have 10 kernels, we want to try + // 5 and 10, but not 6, 7, 8 and 9. + while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; + if (gm1 > nm0) break; + // Check the candidate. + int res = checkGrain(m, n, bm, bn, bk, gm1, gn, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nm1 = divup(nm0, gm1); + if (res == 0) continue; + // Commit new grain size. + gm = gm1; + } + return gm; + } + + Index coarsenN(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + int num_threads, bool shard_by_col) const { + Index gn = 1; + Index gn1 = 1; + Index nn0 = divup(n, bn); + Index nn1 = nn0; + for (;;) { + while (gn1 <= nn0 && nn1 == divup(nn0, gn1)) gn1++; + if (gn1 > nn0) break; + int res = checkGrain(m, n, bm, bn, bk, gm, gn1, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nn1 = divup(nn0, gn1); + if (res == 0) continue; + gn = gn1; + } + return gn; + } + + // checkGrain checks whether grain (gm, gn) is suitable and is better than + // (oldgm, oldgn). + int checkGrain(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + Index gn, Index oldgm, Index oldgn, int num_threads, + bool shard_by_col) const { + const TensorOpCost cost = + contractionCost(bm * gm, bn * gn, bm, bn, bk, shard_by_col, true); + double taskSize = TensorCostModel::taskSize( + static_cast(bm) * gm * bn * gn, cost); + // If the task is too small, then we agree on it regardless of anything + // else. Otherwise synchronization overheads will dominate. + if (taskSize < 1) return 1; + // If it is too large, then we reject it and all larger tasks. + if (taskSize > 2) return -1; + // Now we are in presumably good task size range. + // The main deciding factor here is parallelism. Consider that we have 12 + // kernels and 4 threads. Grains of 2, 3 and 4 all yield good task sizes. + // But 2/4 yield 6/3 tasks, which gives us parallelism of 0.75 (at most 3/4 + // of cores will be busy). While grain size 3 gives us 4 tasks, which gives + // us parallelism of 1 (we can load all cores). + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index new_tasks = divup(nm0, gm) * divup(nn0, gn); + double new_parallelism = static_cast(new_tasks) / + (divup(new_tasks, num_threads) * num_threads); + Index old_tasks = divup(nm0, oldgm) * divup(nn0, oldgn); + double old_parallelism = static_cast(old_tasks) / + (divup(old_tasks, num_threads) * num_threads); + if (new_parallelism > old_parallelism || new_parallelism == 1) return 1; + return 0; + } + +#else // EIGEN_USE_SIMPLE_THREAD_POOL + + template + void evalProduct(Scalar* buffer) const { + if (this->m_j_size == 1) { + this->template evalGemv(buffer); + return; + } + + evalGemm(buffer); + } + + template + void evalGemm(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + + const int lhs_packet_size = internal::unpacket_traits::size; + const int rhs_packet_size = internal::unpacket_traits::size; + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + // TODO: packing could be faster sometimes if we supported row major tensor mappers + typedef internal::gemm_pack_lhs LhsPacker; + typedef internal::gemm_pack_rhs RhsPacker; + + // TODO: replace false, false with conjugate values? + typedef internal::gebp_kernel GebpKernel; + + typedef internal::packLhsArg packLArg; + typedef internal::packRhsAndKernelArg packRKArg; + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + // compute block sizes (which depend on number of threads) + const Index num_threads = this->m_device.numThreads(); + internal::TensorContractionBlocking blocking(k, m, n, num_threads); + Index mc = blocking.mc(); + Index nc = blocking.nc(); + Index kc = blocking.kc(); + eigen_assert(mc <= m); + eigen_assert(nc <= n); + eigen_assert(kc <= k); + +#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) + const Index k_blocks = CEIL_DIV(k, kc); + const Index n_blocks = CEIL_DIV(n, nc); + const Index m_blocks = CEIL_DIV(m, mc); + const Index sizeA = mc * kc; + const Index sizeB = kc * nc; + + /* cout << "m: " << m << " n: " << n << " k: " << k << endl; + cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl; + cout << "m_blocks: " << m_blocks << " n_blocks: " << n_blocks << " k_blocks: " << k_blocks << endl; + cout << "num threads: " << num_threads << endl; + */ + + // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB + // aren't 16 byte aligned segfaults will happen due to SIMD instructions + // note: You can get away with allocating just a single blockA and offsets and meet the + // the alignment requirements with the assumption that + // (Traits::mr * sizeof(ResScalar)) % 16 == 0 + const Index numBlockAs = numext::mini(num_threads, m_blocks); + MaxSizeVector blockAs(num_threads); + for (int i = 0; i < num_threads; i++) { + blockAs.push_back(static_cast(this->m_device.allocate(sizeA * sizeof(LhsScalar)))); + } + + // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread + // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful. + // Other options: (1) reuse memory when a thread finishes. con: tricky + // (2) allocate block B memory in each thread. con: overhead + MaxSizeVector blockBs(n_blocks); + for (int i = 0; i < n_blocks; i++) { + blockBs.push_back(static_cast(this->m_device.allocate(sizeB * sizeof(RhsScalar)))); + } + + // lhs_notifications starts with all null Notifications + MaxSizeVector lhs_notifications(num_threads, nullptr); + + // this should really be numBlockAs * n_blocks; + const Index num_kernel_notifications = num_threads * n_blocks; + MaxSizeVector kernel_notifications(num_kernel_notifications, + nullptr); + + for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { + const Index k_start = k_block_idx * kc; + // make sure we don't overshoot right edge of left matrix + const Index actual_kc = numext::mini(k_start + kc, k) - k_start; + + for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) { + const Index num_blocks = numext::mini(m_blocks-m_block_idx, numBlockAs); + + for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) { + const Index m_start = mt_block_idx * mc; + const Index actual_mc = numext::mini(m_start + mc, m) - m_start; + eigen_assert(actual_mc > 0); + + Index blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; + + for (int i = 0; i < n_blocks; ++i) { + Index notification_id = (blockAId * n_blocks + i); + // Wait for any current kernels using this slot to complete + // before using it. + if (kernel_notifications[notification_id]) { + wait_until_ready(kernel_notifications[notification_id]); + delete kernel_notifications[notification_id]; + } + kernel_notifications[notification_id] = new Notification(); + } + const packLArg arg = { + blockAs[blockAId], // blockA + lhs, // lhs + m_start, // m + k_start, // k + actual_mc, // mc + actual_kc, // kc + }; + + // Delete any existing notification since we may be + // replacing it. The algorithm should ensure that there are + // no existing waiters on this notification. + delete lhs_notifications[blockAId]; + lhs_notifications[blockAId] = + this->m_device.enqueue(&Self::packLhs, arg); + } + + // now start kernels. + const Index m_base_start = m_block_idx * mc; + const bool need_to_pack = m_block_idx == 0; + + for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx++) { + const Index n_start = n_block_idx * nc; + const Index actual_nc = numext::mini(n_start + nc, n) - n_start; + + // first make sure the previous kernels are all done before overwriting rhs. Also wait if + // we're going to start new k. In both cases need_to_pack is true. + if (need_to_pack) { + for (Index i = num_blocks; i < num_threads; ++i) { + Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; + Index future_id = (blockAId * n_blocks + n_block_idx); + wait_until_ready(kernel_notifications[future_id]); + } + } + + packRKArg arg = { + &blockAs, // blockA + blockBs[n_block_idx], // blockB + rhs, // rhs + output, // output + m_base_start, // m + k_start, // k + n_start, // n + mc, // mc + actual_kc, // kc + actual_nc, // nc + num_threads, + numBlockAs, + m, + k_block_idx, + m_block_idx, + n_block_idx, // n_block_idx + m_blocks, // m_blocks + n_blocks, // n_blocks + &kernel_notifications, // kernel notifications + &lhs_notifications, // lhs notifications + need_to_pack, // need_to_pack + }; + + // We asynchronously kick off this function, which ends up + // notifying the appropriate kernel_notifications objects, + // which this thread waits on before exiting. + this->m_device.enqueueNoNotification(&Self::packRhsAndKernel, arg); + } + } + } + + // Make sure all the kernels are done. + for (size_t i = 0; i < kernel_notifications.size(); ++i) { + wait_until_ready(kernel_notifications[i]); + delete kernel_notifications[i]; + } + + // No need to wait for lhs notifications since they should have + // already been waited on. Just clean them up. + for (size_t i = 0; i < lhs_notifications.size(); ++i) { + delete lhs_notifications[i]; + } + + // deallocate all of the memory for both A and B's + for (size_t i = 0; i < blockAs.size(); i++) { + this->m_device.deallocate(blockAs[i]); + } + for (size_t i = 0; i < blockBs.size(); i++) { + this->m_device.deallocate(blockBs[i]); + } + +#undef CEIL_DIV + } + + /* + * Packs a LHS block of size (mt, kc) starting at lhs(m, k). Before packing + * the LHS block, check that all of the kernels that worked on the same + * mt_block_idx in the previous m_block are done. + */ + template + static void packLhs(const packLArg arg) { + // perform actual packing + LhsPacker pack_lhs; + pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m_start, arg.k_start), arg.kc, arg.mc); + } + + /* + * Packs a RHS block of size (kc, nc) starting at (k, n) after checking that + * all kernels in the previous block are done. + * Then for each LHS future, we wait on the future and then call GEBP + * on the area packed by the future (which starts at + * blockA + future_idx * mt * kc) on the LHS and with the full packed + * RHS block. + * The output of this GEBP is written to output(m + i * mt, n). + */ + template + static void packRhsAndKernel(packRKArg arg) { + if (arg.need_to_pack) { + RhsPacker pack_rhs; + pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k, arg.n), arg.kc, arg.nc); + } + + GebpKernel gebp; + for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) { + const Index m_base_start = arg.m + arg.mc*mt_block_idx; + if (m_base_start < arg.max_m) { + Index blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads; + wait_until_ready((*arg.lhs_notifications)[blockAId]); + const Index actual_mc = numext::mini(m_base_start + arg.mc, arg.max_m) - m_base_start; + gebp(arg.output.getSubMapper(m_base_start, arg.n), + (*arg.blockAs)[blockAId], arg.blockB, + actual_mc, arg.kc, arg.nc, Scalar(1), -1, -1, 0, 0); + + // Notify that the kernel is done. + const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx; + (*arg.kernel_notifications)[set_idx]->Notify(); + } + } + } +#endif // EIGEN_USE_SIMPLE_THREAD_POOL + + TensorOpCost contractionCost(Index m, Index n, Index bm, Index bn, Index bk, + bool shard_by_col, bool prepacked) const { + const int packed_size = std::min(PacketType::size, + PacketType::size); + const int output_packet_size = internal::unpacket_traits::size; + const double kd = static_cast(bk); + // Peak VFMA bandwidth is 0.5. However if we have not enough data for + // vectorization bandwidth drops. The 4.0 and 2.0 bandwidth is determined + // experimentally. + double computeBandwidth = bk == 1 ? 4.0 : + (shard_by_col ? bn : bm) < Traits::nr || + (shard_by_col ? bm : bn) < Traits::mr ? 2.0 : 0.5; +#ifndef EIGEN_VECTORIZE_FMA + // Bandwidth of all of VFMA/MULPS/ADDPS is 0.5 on latest Intel processors. + // However for MULPS/ADDPS we have dependent sequence of 2 such instructions, + // so overall bandwidth is 1.0. + if (computeBandwidth == 0.5) computeBandwidth = 1.0; +#endif + // Computations. + TensorOpCost cost = TensorOpCost(0, 0, kd * computeBandwidth, true, packed_size); + // Output stores. + cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size); + if (prepacked) { + // Packing and kernels are executed in different tasks. When we calculate + // task grain size we look only at kernel cost assuming that kernel + // is more expensive than packing. + return cost; + } + // Lhs/rhs loads + computations. + TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * (kd / n); + TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * (kd / m); + // Lhs packing memory cost does not contribute considerably to overall + // execution time because lhs is prefetched early and accessed sequentially. + if (shard_by_col) + lhsCost.dropMemoryCost(); + else + rhsCost.dropMemoryCost(); + return cost + lhsCost + rhsCost; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_USE_THREADS +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h new file mode 100644 index 000000000..860a6949a --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -0,0 +1,279 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H + +namespace Eigen { + +/** \class TensorConversionOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor conversion class. This class makes it possible to vectorize + * type casting operations when the number of scalars per packet in the source + * and the destination type differ + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef TargetType Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConversionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConversionOp type; +}; + +} // end namespace internal + + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + return internal::pcast(m_impl.template packet(index)); + } + + private: + const TensorEvaluator& m_impl; +}; + + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + SrcPacket src3 = m_impl.template packet(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet(index + 3 * SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2, src3, src4); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + // Only call m_impl.packet() when we have direct access to the underlying data. This + // ensures that we don't compute the subexpression twice. We may however load some + // coefficients twice, but in practice this doesn't negatively impact performance. + if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) { + // Force unaligned memory loads since we can't ensure alignment anymore + return internal::pcast(m_impl.template packet(index)); + } else { + const int TgtPacketSize = internal::unpacket_traits::size; + typedef typename internal::unpacket_traits::type SrcType; + typedef typename internal::unpacket_traits::type TgtType; + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::unpacket_traits::type values[TgtPacketSize]; + for (int i = 0; i < TgtPacketSize; ++i) { + values[i] = converter(m_impl.coeff(index+i)); + } + TgtPacket rslt = internal::pload(values); + return rslt; + } + } + + private: + const TensorEvaluator& m_impl; + const typename TensorEvaluator::Index m_maxIndex; +}; + +template +class TensorConversionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef Scalar CoeffReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) + : m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +template struct ConversionSubExprEval { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { + impl.evalSubExprsIfNeeded(NULL); + return true; + } +}; + +template struct ConversionSubExprEval { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { + return impl.evalSubExprsIfNeeded(data); + } +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConversionOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef TargetType Scalar; + typedef TargetType CoeffReturnType; + typedef typename internal::remove_all::Scalar>::type SrcType; + typedef typename PacketType::type PacketReturnType; + typedef typename PacketType::type PacketSourceType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = true, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) + { + return ConversionSubExprEval::value, TensorEvaluator, Scalar>::run(m_impl, data); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + internal::scalar_cast_op converter; + return converter(m_impl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const bool Vectorizable = TensorEvaluator::PacketAccess & + internal::type_casting_traits::VectorizedCast; + return PacketConv::run(m_impl, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double cast_cost = TensorOpCost::CastCost(); + if (vectorized) { + const double SrcCoeffRatio = + internal::type_casting_traits::SrcCoeffRatio; + const double TgtCoeffRatio = + internal::type_casting_traits::TgtCoeffRatio; + return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) + + TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize)); + } else { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost); + } + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + template + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator& impl, Index index) { + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = converter(impl.coeff(index+i)); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + }; + + template + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator& impl, Index index) { + const int SrcCoeffRatio = internal::type_casting_traits::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits::TgtCoeffRatio; + PacketConverter, PacketSourceType, PacketReturnType, + SrcCoeffRatio, TgtCoeffRatio> converter(impl); + return converter.template packet(index); + } + }; + + TensorEvaluator m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h new file mode 100644 index 000000000..abdf742c6 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -0,0 +1,1104 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H + +namespace Eigen { + +/** \class TensorConvolution + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor convolution class. + * + * + */ +namespace internal { + +template +class IndexMapper { + public: + IndexMapper(const InputDims& input_dims, const array& kernel_dims, + const array& indices) { + + array dimensions = input_dims; + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = indices[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + dimensions[index] = result_dim; + } + + array inputStrides; + array outputStrides; + if (static_cast(Layout) == static_cast(ColMajor)) { + inputStrides[0] = 1; + outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + inputStrides[i] = inputStrides[i-1] * input_dims[i-1]; + outputStrides[i] = outputStrides[i-1] * dimensions[i-1]; + } + } else { + inputStrides[NumDims - 1] = 1; + outputStrides[NumDims - 1] = 1; + for (int i = static_cast(NumDims) - 2; i >= 0; --i) { + inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; + outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1]; + } + } + + array cudaInputDimensions; + array cudaOutputDimensions; + array tmp = dimensions; + array ordering; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = i + offset; + ordering[index] = indices[i]; + tmp[indices[i]] = -1; + cudaInputDimensions[index] = input_dims[indices[i]]; + cudaOutputDimensions[index] = dimensions[indices[i]]; + } + + int written = static_cast(Layout) == static_cast(ColMajor) + ? NumKernelDims + : 0; + for (int i = 0; i < NumDims; ++i) { + if (tmp[i] >= 0) { + ordering[written] = i; + cudaInputDimensions[written] = input_dims[i]; + cudaOutputDimensions[written] = dimensions[i]; + ++written; + } + } + + for (int i = 0; i < NumDims; ++i) { + m_inputStrides[i] = inputStrides[ordering[i]]; + m_outputStrides[i] = outputStrides[ordering[i]]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (i > NumKernelDims) { + m_cudaInputStrides[i] = + m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1]; + m_cudaOutputStrides[i] = + m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1]; + } else { + m_cudaInputStrides[i] = 1; + m_cudaOutputStrides[i] = 1; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (i + 1 < offset) { + m_cudaInputStrides[i] = + m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1]; + m_cudaOutputStrides[i] = + m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1]; + } else { + m_cudaInputStrides[i] = 1; + m_cudaOutputStrides[i] = 1; + } + } + } + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_cudaInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_cudaInputStrides[d]; + } + inputIndex += p * m_inputStrides[NumKernelDims]; + } else { + std::ptrdiff_t limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_cudaInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_cudaInputStrides[d]; + } + inputIndex += p * m_inputStrides[limit]; + } + return inputIndex; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const { + Index outputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_cudaOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_cudaOutputStrides[d]; + } + outputIndex += p * m_outputStrides[NumKernelDims]; + } else { + std::ptrdiff_t limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_cudaOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_cudaOutputStrides[d]; + } + outputIndex += p * m_outputStrides[limit]; + } + return outputIndex; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] + + k * m_inputStrides[offset + 2]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] + + k * m_outputStrides[offset + 2]; + } + + private: + static const int NumDims = internal::array_size::value; + array m_inputStrides; + array m_outputStrides; + array m_cudaInputStrides; + array m_cudaOutputStrides; +}; + + + +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type::ret Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename InputXprType::Nested LhsNested; + typedef typename KernelXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConvolutionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConvolutionOp type; +}; + +} // end namespace internal + + + +template +class TensorConvolutionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims) + : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Indices& indices() const { return m_indices; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + inputExpression() const { return m_input_xpr; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + kernelExpression() const { return m_kernel_xpr; } + + protected: + typename InputXprType::Nested m_input_xpr; + typename KernelXprType::Nested m_kernel_xpr; + const Indices m_indices; +}; + + +template +struct TensorEvaluator, Device> +{ + typedef TensorConvolutionOp XprType; + + static const int NumDims = internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; + } + } else { + m_inputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; + } + } + + m_dimensions = m_inputImpl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i > 0) { + m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; + } else { + m_kernelStride[0] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; + } + } else { + for (int i = NumKernelDims - 1; i >= 0; --i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i < NumKernelDims - 1) { + m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; + } else { + m_kernelStride[NumKernelDims - 1] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_inputImpl.evalSubExprsIfNeeded(NULL); + preloadKernel(); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_local_kernel) { + m_device.deallocate((void*)m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + + void evalTo(typename XprType::Scalar* buffer) { + evalSubExprsIfNeeded(NULL); + for (int i = 0; i < dimensions().TotalSize(); ++i) { + buffer[i] += coeff(i); + } + cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + CoeffReturnType result = CoeffReturnType(0); + convolve(firstInput(index), 0, NumKernelDims-1, result); + return result; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const + { + Index indices[2] = {index, index+PacketSize-1}; + Index startInputs[2] = {0, 0}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } + startInputs[0] += indices[0]; + startInputs[1] += indices[1]; + + if (startInputs[1]-startInputs[0] == PacketSize-1) { + PacketReturnType result = internal::pset1(0); + convolvePacket(startInputs[0], 0, NumKernelDims-1, result); + return result; + } else { + EIGEN_ALIGN_MAX Scalar data[PacketSize]; + data[0] = Scalar(0); + convolve(startInputs[0], 0, NumKernelDims-1, data[0]); + for (int i = 1; i < PacketSize-1; ++i) { + data[i] = Scalar(0); + convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]); + } + data[PacketSize-1] = Scalar(0); + convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]); + return internal::pload(data); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } + startInput += index; + return startInput; + } + + EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const { + for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { + const Index input = firstIndex + j * m_indexStride[DimIndex]; + const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; + if (DimIndex > 0) { + convolve(input, kernel, DimIndex-1, accum); + } else { + accum += m_inputImpl.coeff(input) * m_kernel[kernel]; + } + } + } + + template + EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const { + for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { + const Index input = firstIndex + j * m_indexStride[DimIndex]; + const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; + if (DimIndex > 0) { + convolvePacket(input, kernel, DimIndex-1, accum); + } else { + accum = internal::pmadd(m_inputImpl.template packet(input), internal::pset1(m_kernel[kernel]), accum); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + const Scalar* in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + Scalar* local = (Scalar*)m_device.allocate(kernel_sz); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(local, m_kernelArg); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + + m_kernel = local; + m_local_kernel = true; + } + } + + array m_inputStride; + array m_outputStride; + + array m_indexStride; + array m_kernelStride; + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; + Dimensions m_dimensions; + + KernelArgType m_kernelArg; + const Scalar* m_kernel; + bool m_local_kernel; + const Device& m_device; +}; + + + + +// Use an optimized implementation of the evaluation code for GPUs whenever possible. +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + +template +struct GetKernelSize { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const { + return StaticKernelSize; + } +}; +template <> +struct GetKernelSize { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const { + return kernelSize; + } +}; + +template +__global__ void EigenConvolutionKernel1D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int kernelSize, float* buffer) { + extern __shared__ float s[]; + + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + GetKernelSize()(kernelSize); + const int num_x_output = last_x - first_x + 1; + + const int first_plane = blockIdx.y * blockDim.y; + const int plane_stride = blockDim.y * gridDim.y; + + for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) { + // Load inputs to shared memory + const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = threadIdx.y * num_x_input; + #pragma unroll + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x); + s[i + plane_kernel_offset] = eval.coeff(tensor_index); + } + + __syncthreads(); + + // Compute the convolution + const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); + + #pragma unroll + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + const int kernel_offset = plane_kernel_offset + i; + float result = 0.0f; + #pragma unroll + for (int k = 0; k < GetKernelSize()(kernelSize); ++k) { + result += s[k + kernel_offset] * kernel[k]; + } + const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x); + buffer[tensor_index] = result; + } + __syncthreads(); + } +}; + +template +__global__ void EigenConvolutionKernel2D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int numY, const int maxY, const int kernelSizeX, + const int kernelSizeY, float* buffer) { + extern __shared__ float s[]; + + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + GetKernelSize()(kernelSizeX); + const int num_x_output = last_x - first_x + 1; + + const int first_y = blockIdx.y * maxY; + const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; + const int num_y_input = last_y - first_y + GetKernelSize()(kernelSizeY); + const int num_y_output = last_y - first_y + 1; + + const int first_plane = blockIdx.z * blockDim.z; + const int plane_stride = blockDim.z * gridDim.z; + + for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) { + + const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = threadIdx.z * num_y_input; + + // Load inputs to shared memory + #pragma unroll + for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { + const int input_offset = num_x_input * (j + plane_kernel_offset); + #pragma unroll + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y); + s[i + input_offset] = eval.coeff(tensor_index); + } + } + + __syncthreads(); + + // Convolution + const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); + + #pragma unroll + for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { + #pragma unroll + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + float result = 0.0f; + #pragma unroll + for (int l = 0; l < GetKernelSize()(kernelSizeY); ++l) { + const int kernel_offset = kernelSizeX * l; + const int input_offset = i + num_x_input * (j + l + plane_kernel_offset); + #pragma unroll + for (int k = 0; k < GetKernelSize()(kernelSizeX); ++k) { + result += s[k + input_offset] * kernel[k + kernel_offset]; + } + } + const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y); + buffer[tensor_index] = result; + } + } + + __syncthreads(); + } +}; + +template +__global__ void EigenConvolutionKernel3D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const size_t numPlanes, const size_t numX, + const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, + const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, + const size_t kernelSizeZ, float* buffer) { + extern __shared__ float s[]; + + // Load inputs to shared memory + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + kernelSizeX; + + const int first_y = blockIdx.y * maxY; + const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; + const int num_y_input = last_y - first_y + kernelSizeY; + + const int first_z = blockIdx.z * maxZ; + const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1; + const int num_z_input = last_z - first_z + kernelSizeZ; + + for (int p = 0; p < numPlanes; ++p) { + + const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = 0; + + for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) { + for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z); + s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index); + } + } + } + + __syncthreads(); + + // Convolution + const int num_z_output = last_z - first_z + 1; + const int num_y_output = last_y - first_y + 1; + const int num_x_output = last_x - first_x + 1; + const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); + + for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) { + for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + float result = 0.0f; + for (int n = 0; n < kernelSizeZ; ++n) { + for (int m = 0; m < kernelSizeY; ++m) { + for (int l = 0; l < kernelSizeX; ++l) { + result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)]; + } + } + } + const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z); + buffer[tensor_index] = result; + } + } + } + __syncthreads(); + } +}; + + + +template +struct TensorEvaluator, GpuDevice> +{ + typedef TensorConvolutionOp XprType; + + static const int NumDims = internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename TensorEvaluator::Dimensions KernelDimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device) + : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + + m_dimensions = m_inputImpl.dimensions(); + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + } + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename InputArgType::Scalar Scalar; + static const int PacketSize = internal::unpacket_traits::size; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + preloadKernel(); + m_inputImpl.evalSubExprsIfNeeded(NULL); + if (data) { + executeEval(data); + return false; + } else { + m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)); + executeEval(m_buf); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_buf) { + m_device.deallocate(m_buf); + m_buf = NULL; + } + if (m_local_kernel) { + m_device.deallocate((void*)m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + + EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + const Scalar* in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + Scalar* local = (Scalar*)m_device.allocate(kernel_sz); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(local, m_kernelArg); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + + m_kernel = local; + m_local_kernel = true; + } + } + + static unsigned int ceil(unsigned int num, unsigned int denom) { + const unsigned int rounded_toward_zero = num / denom; + if (num > rounded_toward_zero * denom) { + return rounded_toward_zero + 1; + } + return rounded_toward_zero; + } + + void executeEval(Scalar* data) const { + typedef typename TensorEvaluator::Dimensions InputDims; + + const int maxSharedMem = m_device.sharedMemPerBlock(); + const int maxThreadsPerBlock = m_device.maxCudaThreadsPerBlock(); + const int maxBlocksPerProcessor = m_device.maxCudaThreadsPerMultiProcessor() / maxThreadsPerBlock; + const int numMultiProcessors = m_device.getNumCudaMultiProcessors(); + const int warpSize = 32; + + switch (NumKernelDims) { + case 1: { + const int kernel_size = m_kernelImpl.dimensions().TotalSize(); + + const int numX = dimensions()[m_indices[0]]; + const int numP = dimensions().TotalSize() / numX; + int maxX; + dim3 block_size; + + const int single_stride_dim = + static_cast(Layout) == static_cast(ColMajor) + ? 0 + : m_inputImpl.dimensions().rank() - 1; + if (m_indices[0] == single_stride_dim) { + // Maximum the reuse + const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32; + maxX = numext::mini(inner_dim, numX); + const int maxP = numext::mini(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); + block_size.x = numext::mini(maxThreadsPerBlock, maxX); + block_size.y = numext::mini(maxThreadsPerBlock / block_size.x, maxP); + } + else { + // Read as much as possible alongside the inner most dimension, that is the plane + const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); + const int maxP = numext::mini(inner_dim, numP); + maxX = numext::mini(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); + + block_size.x = numext::mini(warpSize, maxX); + block_size.y = numext::mini(maxThreadsPerBlock/block_size.x, maxP); + } + + const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); + assert(shared_mem <= maxSharedMem); + + const int num_x_blocks = ceil(numX, maxX); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks); + + dim3 num_blocks(num_x_blocks, numext::mini(num_y_blocks, ceil(numP, block_size.y))); + + + //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + + const array indices(m_indices[0]); + const array kernel_dims(m_kernelImpl.dimensions()[0]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + switch(kernel_size) { + case 4: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data); + break; + } + case 7: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data); + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data); + } + } + break; + } + + case 2: { + const int idxX = + static_cast(Layout) == static_cast(ColMajor) ? 0 : 1; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 0; + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numP = dimensions().TotalSize() / (numX*numY); + + const float scaling_factor = sqrtf(static_cast(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x)); + + // Snap maxX to warp size + int inner_dim = ((static_cast(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32; + const int maxX = numext::mini(inner_dim, numX); + const int maxY = numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); + const int maxP = numext::mini(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); + + dim3 block_size; + block_size.x = numext::mini(1024, maxX); + block_size.y = numext::mini(1024/block_size.x, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxP); + + const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); + assert(shared_mem <= maxSharedMem); + + const int num_x_blocks = ceil(numX, maxX); + const int num_y_blocks = ceil(numY, maxY); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks); + + dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini(num_z_blocks, ceil(numP, block_size.z))); + + + //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + + const array indices(m_indices[idxX], m_indices[idxY]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + switch (kernel_size_x) { + case 4: { + switch (kernel_size_y) { + case 7: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data); + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data); + break; + } + } + break; + } + case 7: { + switch (kernel_size_y) { + case 4: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data); + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data); + break; + } + } + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data); + break; + } + } + break; + } + + case 3: { + const int idxX = + static_cast(Layout) == static_cast(ColMajor) ? 0 : 2; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 1; + const int idxZ = + static_cast(Layout) == static_cast(ColMajor) ? 2 : 0; + + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + const int kernel_size_z = m_kernelImpl.dimensions()[idxZ]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numZ = dimensions()[m_indices[idxZ]]; + const int numP = dimensions().TotalSize() / (numX*numY*numZ); + + const int maxX = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); + const int maxY = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); + const int maxZ = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); + + dim3 block_size; + block_size.x = numext::mini(32, maxX); + block_size.y = numext::mini(32, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxZ); + dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); + + const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); + assert(shared_mem <= maxSharedMem); + + //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + const array indices(m_indices[idxX], m_indices[idxY], + m_indices[idxZ]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY], + m_kernelImpl.dimensions()[idxZ]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data); + break; + } + + default: { + EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return m_buf[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return internal::ploadt(m_buf+index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost + // model. + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + + private: + // No assignment (copies are needed by the kernels) + TensorEvaluator& operator = (const TensorEvaluator&); + + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; + KernelArgType m_kernelArg; + Indices m_indices; + Dimensions m_dimensions; + Scalar* m_buf; + const Scalar* m_kernel; + bool m_local_kernel; + + const GpuDevice& m_device; +}; +#endif + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h new file mode 100644 index 000000000..83c449cf1 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -0,0 +1,212 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H +#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H + +namespace Eigen { + +/** \class TensorEvaluator + * \ingroup CXX11_Tensor_Module + * + * \brief A cost model used to limit the number of threads used for evaluating + * tensor expression. + * + */ + +// Class storing the cost of evaluating a tensor expression in terms of the +// estimated number of operand bytes loads, bytes stored, and compute cycles. +class TensorOpCost { + public: + // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple + // model based on minimal reciprocal throughput numbers from Intel or + // Agner Fog's tables would be better than what is there now. + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() { + return internal::functor_traits< + internal::scalar_product_op >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() { + return internal::functor_traits >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() { + return internal::functor_traits< + internal::scalar_quotient_op >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() { + return internal::functor_traits >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() { + return internal::functor_traits< + internal::scalar_cast_op >::Cost; + } + + EIGEN_DEVICE_FUNC + TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} + EIGEN_DEVICE_FUNC + TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) + : bytes_loaded_(bytes_loaded), + bytes_stored_(bytes_stored), + compute_cycles_(compute_cycles) {} + + EIGEN_DEVICE_FUNC + TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, + bool vectorized, double packet_size) + : bytes_loaded_(bytes_loaded), + bytes_stored_(bytes_stored), + compute_cycles_(vectorized ? compute_cycles / packet_size + : compute_cycles) { + eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded)); + eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored)); + eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const { + return bytes_loaded_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const { + return bytes_stored_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const { + return compute_cycles_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost( + double load_cost, double store_cost, double compute_cost) const { + return load_cost * bytes_loaded_ + store_cost * bytes_stored_ + + compute_cost * compute_cycles_; + } + + // Drop memory access component. Intended for cases when memory accesses are + // sequential or are completely masked by computations. + EIGEN_DEVICE_FUNC void dropMemoryCost() { + bytes_loaded_ = 0; + bytes_stored_ = 0; + } + + // TODO(rmlarsen): Define min in terms of total cost, not elementwise. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); + } + + // TODO(rmlarsen): Define max in terms of total cost, not elementwise. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( + const TensorOpCost& rhs) { + bytes_loaded_ += rhs.bytes_loaded(); + bytes_stored_ += rhs.bytes_stored(); + compute_cycles_ += rhs.compute_cycles(); + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator*=(double rhs) { + bytes_loaded_ *= rhs; + bytes_stored_ *= rhs; + compute_cycles_ *= rhs; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+( + TensorOpCost lhs, const TensorOpCost& rhs) { + lhs += rhs; + return lhs; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( + TensorOpCost lhs, double rhs) { + lhs *= rhs; + return lhs; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( + double lhs, TensorOpCost rhs) { + rhs *= lhs; + return rhs; + } + + friend std::ostream& operator<<(std::ostream& os, const TensorOpCost& tc) { + return os << "[bytes_loaded = " << tc.bytes_loaded() + << ", bytes_stored = " << tc.bytes_stored() + << ", compute_cycles = " << tc.compute_cycles() << "]"; + } + + private: + double bytes_loaded_; + double bytes_stored_; + double compute_cycles_; +}; + +// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads +// in [1:max_threads] instead of just switching multi-threading off for small +// work units. +template +class TensorCostModel { + public: + // Scaling from Eigen compute cost to device cycles. + static const int kDeviceCyclesPerComputeCycle = 1; + + // Costs in device cycles. + static const int kStartupCycles = 100000; + static const int kPerThreadCycles = 100000; + static const int kTaskSize = 40000; + + // Returns the number of threads in [1:max_threads] to use for + // evaluating an expression with the given output size and cost per + // coefficient. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads( + double output_size, const TensorOpCost& cost_per_coeff, int max_threads) { + double cost = totalCost(output_size, cost_per_coeff); + int threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9; + return numext::mini(max_threads, numext::maxi(1, threads)); + } + + // taskSize assesses parallel task size. + // Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task + // granularity needs to be increased to mitigate parallelization overheads. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize( + double output_size, const TensorOpCost& cost_per_coeff) { + return totalCost(output_size, cost_per_coeff) / kTaskSize; + } + + private: + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost( + double output_size, const TensorOpCost& cost_per_coeff) { + // Cost of memory fetches from L2 cache. 64 is typical cache line size. + // 11 is L2 cache latency on Haswell. + // We don't know whether data is in L1, L2 or L3. But we are most interested + // in single-threaded computational time around 100us-10ms (smaller time + // is too small for parallelization, larger time is not intersting + // either because we are probably using all available threads already). + // And for the target time range, L2 seems to be what matters. Data set + // fitting into L1 is too small to take noticeable time. Data set fitting + // only into L3 presumably will take more than 10ms to load and process. + const double kLoadCycles = 1.0 / 64 * 11; + const double kStoreCycles = 1.0 / 64 * 11; + // Scaling from Eigen compute cost to device cycles. + return output_size * + cost_per_coeff.total_cost(kLoadCycles, kStoreCycles, + kDeviceCyclesPerComputeCycle); + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h new file mode 100644 index 000000000..e020d076f --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H +#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H + +namespace Eigen { + +/** \class TensorCustomUnaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename XprType::Scalar Scalar; + typedef typename XprType::StorageKind StorageKind; + typedef typename XprType::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomUnaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) + : m_expr(expr), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomUnaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_expr; } + + protected: + typename XprType::Nested m_expr; + const CustomUnaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomUnaryOp ArgType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.expression()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast( + m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } + + protected: + EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { + TensorMap > result( + data, m_dimensions); + m_op.func().eval(m_op.expression(), result, m_device); + } + + Dimensions m_dimensions; + const ArgType m_op; + const Device& m_device; + CoeffReturnType* m_result; +}; + + + +/** \class TensorCustomBinaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename internal::promote_storage_type::ret Scalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomBinaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::traits::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) + + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomBinaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const CustomBinaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomBinaryOp XprType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } + + protected: + EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { + TensorMap > result(data, m_dimensions); + m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); + } + + Dimensions m_dimensions; + const XprType m_op; + const Device& m_device; + CoeffReturnType* m_result; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h new file mode 100644 index 000000000..29e50a3b2 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -0,0 +1,68 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H + +namespace Eigen { + +/** \class TensorDevice + * \ingroup CXX11_Tensor_Module + * + * \brief Pseudo expression providing an operator = that will evaluate its argument + * on the specified computing 'device' (GPU, thread pool, ...) + * + * Example: + * C.device(EIGEN_GPU) = A + B; + * + * Todo: operator *= and /=. + */ + +template class TensorDevice { + public: + TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + typedef TensorAssignOp Assign; + Assign assign(m_expression, other); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + template + EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; + Sum sum(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, sum); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + protected: + const DeviceType& m_device; + ExpressionType& m_expression; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h new file mode 100644 index 000000000..ec732f17d --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -0,0 +1,341 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H + +namespace Eigen { + +static const int kCudaScratchSize = 1024; + +// This defines an interface that GPUDevice can take to use +// CUDA streams underneath. +class StreamInterface { + public: + virtual ~StreamInterface() {} + + virtual const cudaStream_t& stream() const = 0; + virtual const cudaDeviceProp& deviceProperties() const = 0; + + // Allocate memory on the actual device where the computation will run + virtual void* allocate(size_t num_bytes) const = 0; + virtual void deallocate(void* buffer) const = 0; + + // Return a scratchpad buffer of size 1k + virtual void* scratchpad() const = 0; + + // Return a semaphore. The semaphore is initially initialized to 0, and + // each kernel using it is responsible for resetting to 0 upon completion + // to maintain the invariant that the semaphore is always equal to 0 upon + // each kernel start. + virtual unsigned int* semaphore() const = 0; +}; + +static cudaDeviceProp* m_deviceProperties; +static bool m_devicePropInitialized = false; + +static void initializeDeviceProp() { + if (!m_devicePropInitialized) { + // Attempts to ensure proper behavior in the case of multiple threads + // calling this function simultaneously. This would be trivial to + // implement if we could use std::mutex, but unfortunately mutex don't + // compile with nvcc, so we resort to atomics and thread fences instead. + // Note that if the caller uses a compiler that doesn't support c++11 we + // can't ensure that the initialization is thread safe. +#if __cplusplus >= 201103L + static std::atomic first(true); + if (first.exchange(false)) { +#else + static bool first = true; + if (first) { + first = false; +#endif + // We're the first thread to reach this point. + int num_devices; + cudaError_t status = cudaGetDeviceCount(&num_devices); + if (status != cudaSuccess) { + std::cerr << "Failed to get the number of CUDA devices: " + << cudaGetErrorString(status) + << std::endl; + assert(status == cudaSuccess); + } + m_deviceProperties = new cudaDeviceProp[num_devices]; + for (int i = 0; i < num_devices; ++i) { + status = cudaGetDeviceProperties(&m_deviceProperties[i], i); + if (status != cudaSuccess) { + std::cerr << "Failed to initialize CUDA device #" + << i + << ": " + << cudaGetErrorString(status) + << std::endl; + assert(status == cudaSuccess); + } + } + +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_release); +#endif + m_devicePropInitialized = true; + } else { + // Wait for the other thread to inititialize the properties. + while (!m_devicePropInitialized) { +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_acquire); +#endif +#if EIGEN_OS_WIN || EIGEN_OS_WIN64 + Sleep(1000); +#else + sleep(1); +#endif + } + } + } +} + +static const cudaStream_t default_stream = cudaStreamDefault; + +class CudaStreamDevice : public StreamInterface { + public: + // Use the default stream on the current device + CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { + cudaGetDevice(&device_); + initializeDeviceProp(); + } + // Use the default stream on the specified device + CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) { + initializeDeviceProp(); + } + // Use the specified stream. Note that it's the + // caller responsibility to ensure that the stream can run on + // the specified device. If no device is specified the code + // assumes that the stream is associated to the current gpu device. + CudaStreamDevice(const cudaStream_t* stream, int device = -1) + : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { + if (device < 0) { + cudaGetDevice(&device_); + } else { + int num_devices; + cudaError_t err = cudaGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + assert(device < num_devices); + device_ = device; + } + initializeDeviceProp(); + } + + virtual ~CudaStreamDevice() { + if (scratch_) { + deallocate(scratch_); + } + } + + const cudaStream_t& stream() const { return *stream_; } + const cudaDeviceProp& deviceProperties() const { + return m_deviceProperties[device_]; + } + virtual void* allocate(size_t num_bytes) const { + cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + void* result; + err = cudaMalloc(&result, num_bytes); + assert(err == cudaSuccess); + assert(result != NULL); + return result; + } + virtual void deallocate(void* buffer) const { + cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + assert(buffer != NULL); + err = cudaFree(buffer); + assert(err == cudaSuccess); + } + + virtual void* scratchpad() const { + if (scratch_ == NULL) { + scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int)); + } + return scratch_; + } + + virtual unsigned int* semaphore() const { + if (semaphore_ == NULL) { + char* scratch = static_cast(scratchpad()) + kCudaScratchSize; + semaphore_ = reinterpret_cast(scratch); + cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + return semaphore_; + } + + private: + const cudaStream_t* stream_; + int device_; + mutable void* scratch_; + mutable unsigned int* semaphore_; +}; + +struct GpuDevice { + // The StreamInterface is not owned: the caller is + // responsible for its initialization and eventual destruction. + explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { + eigen_assert(stream); + } + explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { + eigen_assert(stream); + } + // TODO(bsteiner): This is an internal API, we should not expose it. + EIGEN_STRONG_INLINE const cudaStream_t& stream() const { + return stream_->stream(); + } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return stream_->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + stream_->deallocate(buffer); + } + + EIGEN_STRONG_INLINE void* scratchpad() const { + return stream_->scratchpad(); + } + + EIGEN_STRONG_INLINE unsigned int* semaphore() const { + return stream_->semaphore(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { +#ifndef __CUDA_ARCH__ + cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, + stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + cudaError_t err = + cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + cudaError_t err = + cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { +#ifndef __CUDA_ARCH__ + cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE size_t numThreads() const { + // FIXME + return 32; + } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + // FIXME + return 48*1024; + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // We won't try to take advantage of the l2 cache for the time being, and + // there is no l3 cache on cuda devices. + return firstLevelCacheSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { +#if defined(__CUDACC__) && !defined(__CUDA_ARCH__) + cudaError_t err = cudaStreamSynchronize(stream_->stream()); + if (err != cudaSuccess) { + std::cerr << "Error detected in CUDA stream: " + << cudaGetErrorString(err) + << std::endl; + assert(err == cudaSuccess); + } +#else + assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { + return stream_->deviceProperties().multiProcessorCount; + } + EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { + return stream_->deviceProperties().maxThreadsPerBlock; + } + EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { + return stream_->deviceProperties().maxThreadsPerMultiProcessor; + } + EIGEN_STRONG_INLINE int sharedMemPerBlock() const { + return stream_->deviceProperties().sharedMemPerBlock; + } + EIGEN_STRONG_INLINE int majorDeviceVersion() const { + return stream_->deviceProperties().major; + } + EIGEN_STRONG_INLINE int minorDeviceVersion() const { + return stream_->deviceProperties().minor; + } + + EIGEN_STRONG_INLINE int maxBlocks() const { + return max_blocks_; + } + + // This function checks if the CUDA runtime recorded an error for the + // underlying stream device. + inline bool ok() const { +#ifdef __CUDACC__ + cudaError_t error = cudaStreamQuery(stream_->stream()); + return (error == cudaSuccess) || (error == cudaErrorNotReady); +#else + return false; +#endif + } + + private: + const StreamInterface* stream_; + int max_blocks_; +}; + +#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ + (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ + assert(cudaGetLastError() == cudaSuccess); + + +// FIXME: Should be device and kernel specific. +#ifdef __CUDACC__ +static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { +#ifndef __CUDA_ARCH__ + cudaError_t status = cudaDeviceSetSharedMemConfig(config); + EIGEN_UNUSED_VARIABLE(status) + assert(status == cudaSuccess); +#else + EIGEN_UNUSED_VARIABLE(config) +#endif +} +#endif + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h new file mode 100644 index 000000000..9d141395b --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -0,0 +1,81 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return 1; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return l1CacheSize(); +#else + // Running on a CUDA device, return the amount of shared memory available. + return 48*1024; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + return l3CacheSize(); +#else + // Running on a CUDA device + return firstLevelCacheSize(); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#else + // Running on a CUDA device + return __CUDA_ARCH__ / 100; +#endif + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h new file mode 100644 index 000000000..05459f1d2 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -0,0 +1,281 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// Copyright (C) 2016 Benoit Steiner + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H + +namespace Eigen { + +#define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast::pointer_t>((&(*buf_acc.get_pointer()))) + +struct QueueInterface { + /// class members: + bool exception_caught_ = false; + + mutable std::mutex mutex_; + + /// std::map is the container used to make sure that we create only one buffer + /// per pointer. The lifespan of the buffer now depends on the lifespan of SyclDevice. + /// If a non-read-only pointer is needed to be accessed on the host we should manually deallocate it. + mutable std::map> buffer_map; + /// sycl queue + mutable cl::sycl::queue m_queue; + /// creating device by using selector + /// SyclStreamDevice is not owned. it is the caller's responsibility to destroy it. + template explicit QueueInterface(dev_Selector s): +#ifdef EIGEN_EXCEPTIONS + m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + if (e) { + exception_caught_ = true; + std::rethrow_exception(e); + } + } catch (cl::sycl::exception e) { + std::cerr << e.what() << std::endl; + } + } + })) +#else + m_queue(cl::sycl::queue(s)) +#endif + {} + + /// creating device by using selector + /// SyclStreamDevice is not owned. it is the caller's responsibility to destroy it. + explicit QueueInterface(cl::sycl::device d): +#ifdef EIGEN_EXCEPTIONS + m_queue(cl::sycl::queue(d, [&](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + if (e) { + exception_caught_ = true; + std::rethrow_exception(e); + } + } catch (cl::sycl::exception e) { + std::cerr << e.what() << std::endl; + } + } + })) +#else + m_queue(cl::sycl::queue(d)) +#endif + {} + + + /// Allocating device pointer. This pointer is actually an 8 bytes host pointer used as key to access the sycl device buffer. + /// The reason is that we cannot use device buffer as a pointer as a m_data in Eigen leafNode expressions. So we create a key + /// pointer to be used in Eigen expression construction. When we convert the Eigen construction into the sycl construction we + /// use this pointer as a key in our buffer_map and we make sure that we dedicate only one buffer only for this pointer. + /// The device pointer would be deleted by calling deallocate function. + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + auto buf = cl::sycl::buffer(cl::sycl::range<1>(num_bytes)); + auto ptr =buf.get_access().get_pointer(); + buf.set_final_data(nullptr); + std::lock_guard lock(mutex_); + buffer_map.insert(std::pair>(ptr,buf)); + return static_cast(ptr); + } + + /// This is used to deallocate the device pointer. p is used as a key inside + /// the map to find the device buffer and delete it. + EIGEN_STRONG_INLINE void deallocate(const void *p) const { + std::lock_guard lock(mutex_); + auto it = buffer_map.find(static_cast(p)); + if (it != buffer_map.end()) { + buffer_map.erase(it); + } + } + + EIGEN_STRONG_INLINE void deallocate_all() const { + std::lock_guard lock(mutex_); + buffer_map.clear(); + } + + EIGEN_STRONG_INLINE std::map>::iterator find_buffer(const void* ptr) const { + std::lock_guard lock(mutex_); + auto it1 = buffer_map.find(static_cast(ptr)); + if (it1 != buffer_map.end()){ + return it1; + } + else{ + for(std::map>::iterator it=buffer_map.begin(); it!=buffer_map.end(); ++it){ + auto size = it->second.get_size(); + if((it->first < (static_cast(ptr))) && ((static_cast(ptr)) < (it->first + size)) ) return it; + } + } + std::cerr << "No sycl buffer found. Make sure that you have allocated memory for your buffer by calling allocate function in SyclDevice"<< std::endl; + abort(); + } + + // This function checks if the runtime recorded an error for the + // underlying stream device. + EIGEN_STRONG_INLINE bool ok() const { + if (!exception_caught_) { + m_queue.throw_asynchronous(); + } + return !exception_caught_; + } + // destructor + ~QueueInterface() { buffer_map.clear(); } +}; + +template class MemCopyFunctor { + public: + typedef cl::sycl::accessor read_accessor; + typedef cl::sycl::accessor write_accessor; + MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {} + void operator()(cl::sycl::nd_item<1> itemID) { + auto src_ptr = ConvertToActualTypeSycl(T, m_src_acc); + auto dst_ptr = ConvertToActualTypeSycl(T, m_dst_acc); + auto globalid = itemID.get_global_linear_id(); + if (globalid < m_rng) { + dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset]; + } + } + private: + read_accessor m_src_acc; + write_accessor m_dst_acc; + size_t m_rng; + size_t m_i; + size_t m_offset; +}; + +struct SyclDevice { + // class member. + QueueInterface* m_queue_stream; + /// QueueInterface is not owned. it is the caller's responsibility to destroy it. + explicit SyclDevice(QueueInterface* queue_stream) : m_queue_stream(queue_stream){} + + /// Creation of sycl accessor for a buffer. This function first tries to find + /// the buffer in the buffer_map. If found it gets the accessor from it, if not, + /// the function then adds an entry by creating a sycl buffer for that particular pointer. + template EIGEN_STRONG_INLINE cl::sycl::accessor + get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const void* ptr) const { + return (get_sycl_buffer(num_bytes, ptr).template get_access(cgh)); + } + + /// Accessing the created sycl device buffer for the device pointer + EIGEN_STRONG_INLINE cl::sycl::buffer& get_sycl_buffer(size_t , const void * ptr) const { + return m_queue_stream->find_buffer(ptr)->second; + } + + /// This is used to prepare the number of threads and also the number of threads per block for sycl kernels + EIGEN_STRONG_INLINE void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange) const { + tileSize =sycl_queue().get_device(). template get_info()/2; + rng = n; + if (rng==0) rng=1; + GRange=rng; + if (tileSize>GRange) tileSize=GRange; + else if(GRange>tileSize){ + size_t xMode = GRange % tileSize; + if (xMode != 0) GRange += (tileSize - xMode); + } + } + /// allocate device memory + EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { + return m_queue_stream->allocate(num_bytes); + } + /// deallocate device memory + EIGEN_STRONG_INLINE void deallocate(const void *p) const { + m_queue_stream->deallocate(p); + } + + // some runtime conditions that can be applied here + EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; } + + + /// the memcpy function + template EIGEN_STRONG_INLINE void memcpy(void *dst, const T *src, size_t n) const { + auto it1 = m_queue_stream->find_buffer((void*)src); + auto it2 = m_queue_stream->find_buffer(dst); + auto offset= (static_cast(static_cast(src))) - it1->first; + auto i= (static_cast(dst)) - it2->first; + offset/=sizeof(T); + i/=sizeof(T); + size_t rng, GRange, tileSize; + parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); + sycl_queue().submit([&](cl::sycl::handler &cgh) { + auto src_acc =it1->second.template get_access(cgh); + auto dst_acc =it2->second.template get_access(cgh); + cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor(src_acc, dst_acc, rng, 0, offset)); + }); + sycl_queue().throw_asynchronous(); + } + + /// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device + /// pointer created as a key we find the sycl buffer and get the host accessor with discard_write mode + /// on it. Using a discard_write accessor guarantees that we do not bring back the current value of the + /// buffer to host. Then we use the memcpy to copy the data to the host accessor. The first time that + /// this buffer is accessed, the data will be copied to the device. + template EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { + auto host_acc= get_sycl_buffer(n, dst). template get_access(); + ::memcpy(host_acc.get_pointer(), src, n); + } + /// The memcpyDeviceToHost is used to copy the data from host to device. Here, in order to avoid double copying the data. We create a sycl + /// buffer with map_allocator for the destination pointer with a discard_write accessor on it. The lifespan of the buffer is bound to the + /// lifespan of the memcpyDeviceToHost function. We create a kernel to copy the data, from the device- only source buffer to the destination + /// buffer with map_allocator on the gpu in parallel. At the end of the function call the destination buffer would be destroyed and the data + /// would be available on the dst pointer using fast copy technique (map_allocator). In this case we can make sure that we copy the data back + /// to the cpu only once per function call. + template EIGEN_STRONG_INLINE void memcpyDeviceToHost(void *dst, const T *src, size_t n) const { + auto it = m_queue_stream->find_buffer(src); + auto offset =static_cast(static_cast(src))- it->first; + offset/=sizeof(T); + size_t rng, GRange, tileSize; + parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); + // Assuming that the dst is the start of the destination pointer + auto dest_buf = cl::sycl::buffer >(static_cast(dst), cl::sycl::range<1>(rng*sizeof(T))); + sycl_queue().submit([&](cl::sycl::handler &cgh) { + auto src_acc= it->second.template get_access(cgh); + auto dst_acc =dest_buf.template get_access(cgh); + cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor(src_acc, dst_acc, rng, 0, offset)); + }); + sycl_queue().throw_asynchronous(); + } + /// returning the sycl queue + EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;} + /// Here is the implementation of memset function on sycl. + template EIGEN_STRONG_INLINE void memset(T *buff, int c, size_t n) const { + size_t rng, GRange, tileSize; + parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); + sycl_queue().submit([&](cl::sycl::handler &cgh) { + auto buf_acc =get_sycl_buffer(n, static_cast(static_cast(buff))). template get_access(cgh); + cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { + auto globalid=itemID.get_global_linear_id(); + if (globalid< buf_acc.get_size()) { + for(size_t i=0; iok(); + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h new file mode 100644 index 000000000..210ae1368 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -0,0 +1,279 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H + +namespace Eigen { + +// Use the SimpleThreadPool by default. We'll switch to the new non blocking +// thread pool later. +#ifndef EIGEN_USE_SIMPLE_THREAD_POOL +template using ThreadPoolTempl = NonBlockingThreadPoolTempl; +typedef NonBlockingThreadPool ThreadPool; +#else +template using ThreadPoolTempl = SimpleThreadPoolTempl; +typedef SimpleThreadPool ThreadPool; +#endif + + +// Barrier is an object that allows one or more threads to wait until +// Notify has been called a specified number of times. +class Barrier { + public: + Barrier(unsigned int count) : state_(count << 1), notified_(false) { + eigen_assert(((count << 1) >> 1) == count); + } + ~Barrier() { + eigen_assert((state_>>1) == 0); + } + + void Notify() { + unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; + if (v != 1) { + eigen_assert(((v + 2) & ~1) != 0); + return; // either count has not dropped to 0, or waiter is not waiting + } + std::unique_lock l(mu_); + eigen_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void Wait() { + unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); + if ((v >> 1) == 0) return; + std::unique_lock l(mu_); + while (!notified_) { + cv_.wait(l); + } + } + + private: + std::mutex mu_; + std::condition_variable cv_; + std::atomic state_; // low bit is waiter flag + bool notified_; +}; + + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object, +// but only one caller must call Notify() on the object. +struct Notification : Barrier { + Notification() : Barrier(1) {}; +}; + + +// Runs an arbitrary function and then calls Notify() on the passed in +// Notification. +template struct FunctionWrapperWithNotification +{ + static void run(Notification* n, Function f, Args... args) { + f(args...); + if (n) { + n->Notify(); + } + } +}; + +template struct FunctionWrapperWithBarrier +{ + static void run(Barrier* b, Function f, Args... args) { + f(args...); + if (b) { + b->Notify(); + } + } +}; + +template +static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { + if (n) { + n->Wait(); + } +} + + +// Build a thread pool device on top the an existing pool of threads. +struct ThreadPoolDevice { + // The ownership of the thread pool remains with the caller. + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_STRONG_INLINE int numThreads() const { + return num_threads_; + } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + return l1CacheSize(); + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // The l3 cache size is shared between all the cores. + return l3CacheSize() / num_threads_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + + template + EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { + Notification* n = new Notification(); + pool_->Schedule(std::bind(&FunctionWrapperWithNotification::run, n, f, args...)); + return n; + } + + template + EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, + Function&& f, + Args&&... args) const { + pool_->Schedule(std::bind( + &FunctionWrapperWithBarrier::run, b, f, args...)); + } + + template + EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { + pool_->Schedule(std::bind(f, args...)); + } + + // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if + // called from one of the threads in pool_. Returns -1 otherwise. + EIGEN_STRONG_INLINE int currentThreadId() const { + return pool_->CurrentThreadId(); + } + + // parallelFor executes f with [0, n) arguments in parallel and waits for + // completion. F accepts a half-open interval [first, last). + // Block size is choosen based on the iteration cost and resulting parallel + // efficiency. If block_align is not nullptr, it is called to round up the + // block size. + void parallelFor(Index n, const TensorOpCost& cost, + std::function block_align, + std::function f) const { + typedef TensorCostModel CostModel; + if (n <= 1 || numThreads() == 1 || + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { + f(0, n); + return; + } + + // Calculate block size based on (1) the iteration cost and (2) parallel + // efficiency. We want blocks to be not too small to mitigate + // parallelization overheads; not too large to mitigate tail + // effect and potential load imbalance and we also want number + // of blocks to be evenly dividable across threads. + + double block_size_f = 1.0 / CostModel::taskSize(1, cost); + Index block_size = numext::mini(n, numext::maxi(1, block_size_f)); + const Index max_block_size = + numext::mini(n, numext::maxi(1, 2 * block_size_f)); + if (block_align) { + Index new_block_size = block_align(block_size); + eigen_assert(new_block_size >= block_size); + block_size = numext::mini(n, new_block_size); + } + Index block_count = divup(n, block_size); + // Calculate parallel efficiency as fraction of total CPU time used for + // computations: + double max_efficiency = + static_cast(block_count) / + (divup(block_count, numThreads()) * numThreads()); + // Now try to increase block size up to max_block_size as long as it + // doesn't decrease parallel efficiency. + for (Index prev_block_count = block_count; prev_block_count > 1;) { + // This is the next block size that divides size into a smaller number + // of blocks than the current block_size. + Index coarser_block_size = divup(n, prev_block_count - 1); + if (block_align) { + Index new_block_size = block_align(coarser_block_size); + eigen_assert(new_block_size >= coarser_block_size); + coarser_block_size = numext::mini(n, new_block_size); + } + if (coarser_block_size > max_block_size) { + break; // Reached max block size. Stop. + } + // Recalculate parallel efficiency. + const Index coarser_block_count = divup(n, coarser_block_size); + eigen_assert(coarser_block_count < prev_block_count); + prev_block_count = coarser_block_count; + const double coarser_efficiency = + static_cast(coarser_block_count) / + (divup(coarser_block_count, numThreads()) * numThreads()); + if (coarser_efficiency + 0.01 >= max_efficiency) { + // Taking it. + block_size = coarser_block_size; + block_count = coarser_block_count; + if (max_efficiency < coarser_efficiency) { + max_efficiency = coarser_efficiency; + } + } + } + + // Recursively divide size into halves until we reach block_size. + // Division code rounds mid to block_size, so we are guaranteed to get + // block_count leaves that do actual computations. + Barrier barrier(static_cast(block_count)); + std::function handleRange; + handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { + if (last - first <= block_size) { + // Single block or less, execute directly. + f(first, last); + barrier.Notify(); + return; + } + // Split into halves and submit to the pool. + Index mid = first + divup((last - first) / 2, block_size) * block_size; + pool_->Schedule([=, &handleRange]() { handleRange(mid, last); }); + handleRange(first, mid); + }; + handleRange(0, n); + barrier.Wait(); + } + + // Convenience wrapper for parallelFor that does not align blocks. + void parallelFor(Index n, const TensorOpCost& cost, + std::function f) const { + parallelFor(n, cost, nullptr, std::move(f)); + } + + private: + ThreadPoolInterface* pool_; + int num_threads_; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h new file mode 100644 index 000000000..1a30e45fb --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H + +namespace Eigen { + +/** \internal + * + * \class TensorDimensionList + * \ingroup CXX11_Tensor_Module + * + * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. + * + * \sa Tensor + */ + +template struct DimensionList { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + const Index operator[] (const Index i) const { return i; } +}; + +namespace internal { + +template struct array_size > { + static const size_t value = Rank; +}; +template struct array_size > { + static const size_t value = Rank; +}; + +template const Index array_get(DimensionList&) { + return n; +} +template const Index array_get(const DimensionList&) { + return n; +} + + +#if EIGEN_HAS_CONSTEXPR +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return true; + } +}; +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return true; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; + +template +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; +template +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; + +template +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i == value; + } +}; +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i == value; + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i != value; + } +}; +template +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i != value; + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i > value; + } +}; +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i > value; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i < value; + } +}; +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i < value; + } +}; + +#else +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return true; + } +}; +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return true; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; + +template +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; +template +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; + +template +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ + return false; + } +}; +template +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +#endif + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h new file mode 100644 index 000000000..b24cdebf1 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -0,0 +1,428 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H + + +namespace Eigen { + +/** \internal + * + * \class TensorDimensions + * \ingroup CXX11_Tensor_Module + * + * \brief Set of classes used to encode and store the dimensions of a Tensor. + * + * The Sizes class encodes as part of the type the number of dimensions and the + * sizes corresponding to each dimension. It uses no storage space since it is + * entirely known at compile time. + * The DSizes class is its dynamic sibling: the number of dimensions is known + * at compile time but the sizes are set during execution. + * + * \sa Tensor + */ + +// Boilerplate code +namespace internal { + +template struct dget { + static const std::size_t value = get::value; +}; + + +template +struct fixed_size_tensor_index_linearization_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(array const& indices, + const Dimensions& dimensions) + { + return array_get(indices) + + dget::value * + fixed_size_tensor_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct fixed_size_tensor_index_linearization_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(array const&, const Dimensions&) + { + return 0; + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(const Index index, + const Dimensions& dimensions) + { + const Index mult = (index == n-1) ? 1 : 0; + return array_get(dimensions) * mult + + fixed_size_tensor_index_extraction_helper::run(index, dimensions); + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(const Index, + const Dimensions&) + { + return 0; + } + }; + +} // end namespace internal + + +// Fixed size +#ifndef EIGEN_EMULATE_CXX11_META_H +template +struct Sizes : internal::numeric_list { + typedef internal::numeric_list Base; + static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { + return Base::count; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize() { + return internal::arg_prod(Indices...); + } + + EIGEN_DEVICE_FUNC Sizes() { } + template + explicit EIGEN_DEVICE_FUNC Sizes(const array& /*indices*/) { + // todo: add assertion + } +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { } + explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list /*l*/) { + // todo: add assertion + } +#endif + + template Sizes& operator = (const T& /*other*/) { + // add assertion failure if the size of other is different + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { + return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfColMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfRowMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes&) { + return Sizes::total_size; +} +} + +#else + +template +struct non_zero_size { + typedef internal::type2val type; +}; +template <> +struct non_zero_size<0> { + typedef internal::null_type type; +}; + +template struct Sizes { + typedef typename internal::make_type_list::type, typename non_zero_size::type, typename non_zero_size::type, typename non_zero_size::type, typename non_zero_size::type >::type Base; + static const size_t count = Base::count; + static const std::size_t total_size = internal::arg_prod::value; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + return count; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { + return internal::arg_prod::value; + } + + Sizes() { } + template + explicit Sizes(const array& /*indices*/) { + // todo: add assertion + } + template Sizes& operator = (const T& /*other*/) { + // add assertion failure if the size of other is different + return *this; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template Sizes(DenseIndex... /*indices*/) { } + explicit Sizes(std::initializer_list) { + // todo: add assertion + } +#else + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex operator[] (const int index) const { + switch (index) { + case 0: + return internal::get<0, Base>::value; + case 1: + return internal::get<1, Base>::value; + case 2: + return internal::get<2, Base>::value; + case 3: + return internal::get<3, Base>::value; + case 4: + return internal::get<4, Base>::value; + default: + eigen_assert(false && "index overflow"); + return static_cast(-1); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfColMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfRowMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes&) { + return Sizes::total_size; +} +} + +#endif + +// Boilerplate +namespace internal { +template +struct tensor_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, array const& dimensions) + { + return array_get(indices) + + array_get(dimensions) * + tensor_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct tensor_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, array const&) + { + return array_get(indices); + } +}; +} // end namespace internal + + + +// Dynamic size +template +struct DSizes : array { + typedef array Base; + static const int count = NumDims; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + return NumDims; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { + return (NumDims == 0) ? 1 : internal::array_prod(*static_cast(this)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { + for (int i = 0 ; i < NumDims; ++i) { + (*this)[i] = 0; + } + } + EIGEN_DEVICE_FUNC explicit DSizes(const array& a) : Base(a) { } + + EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) { + eigen_assert(NumDims == 1); + (*this)[0] = i0; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) { + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) { + eigen_assert(NumDims == 2); + (*this)[0] = i0; + (*this)[1] = i1; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + eigen_assert(NumDims == 3); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + eigen_assert(NumDims == 4); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + eigen_assert(NumDims == 5); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + (*this)[4] = i4; + } +#endif + + EIGEN_DEVICE_FUNC DSizes& operator = (const array& other) { + *static_cast(this) = other; + return *this; + } + + // A constexpr would be so much better here + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfColMajor(const array& indices) const { + return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfRowMajor(const array& indices) const { + return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); + } +}; + + + + +// Boilerplate +namespace internal { +template +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, std::vector const& dimensions) + { + return array_get(indices) + + array_get(dimensions) * + tensor_vsize_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, std::vector const&) + { + return array_get(indices); + } +}; +} // end namespace internal + + +namespace internal { + +template struct array_size > { + static const size_t value = NumDims; +}; +template struct array_size > { + static const size_t value = NumDims; +}; +#ifndef EIGEN_EMULATE_CXX11_META_H +template struct array_size > { +static const std::ptrdiff_t value = Sizes::count; +}; +template struct array_size > { +static const std::ptrdiff_t value = Sizes::count; +}; +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes&) { + return get >::value; +} +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { + eigen_assert(false && "should never be called"); + return -1; +} +#else +template struct array_size > { + static const size_t value = Sizes::count; +}; +template struct array_size > { + static const size_t value = Sizes::count; +}; +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes&) { + return get::Base>::value; +} + +#endif + + +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { + return false; + } +}; +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) { + return (array_get(dims1) == array_get(dims2)) & + sizes_match_below_dim::run(dims1, dims2); + } +}; +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { + return true; + } +}; + +} // end namespace internal + + +template +EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) { + return internal::sizes_match_below_dim::value, internal::array_size::value>::run(dims1, dims2); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h new file mode 100644 index 000000000..06987132b --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -0,0 +1,181 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H +#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H + +namespace Eigen { + +/** \class TensorForcedEval + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template class MakePointer_> +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; + template + struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorEvalToOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorEvalToOp type; +}; + +} // end namespace internal + + + + +template class MakePointer_> +class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename MakePointer_::Type PointerType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) + : m_xpr(expr), m_buffer(buffer) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } + + protected: + typename XprType::Nested m_xpr; + PointerType m_buffer; +}; + + + +template class MakePointer_> +struct TensorEvaluator, Device> +{ + typedef TensorEvalToOp XprType; + typedef typename ArgType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename XprType::Index Index; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = true + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), + m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) + { } + + // Used for accessor extraction in SYCL Managed TensorMap: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { + return m_op; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { + } + + typedef typename internal::traits >::template MakePointer::Type DevicePointer; + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { + EIGEN_UNUSED_VARIABLE(scalar); + eigen_assert(scalar == NULL); + return m_impl.evalSubExprsIfNeeded(m_buffer); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { + m_buffer[i] = m_impl.coeff(i); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { + internal::pstoret(m_buffer + i, m_impl.template packet::IsAligned ? Aligned : Unaligned>(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_buffer[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return internal::ploadt(m_buffer + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // We assume that evalPacket or evalScalar is called to perform the + // assignment and account for the cost of the write here. + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } + ArgType expression() const { return m_expression; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + + private: + TensorEvaluator m_impl; + const Device& m_device; + DevicePointer m_buffer; + const XprType& m_op; + const ArgType m_expression; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h new file mode 100644 index 000000000..834ce07df --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -0,0 +1,633 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H + +namespace Eigen { + +/** \class TensorEvaluator + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor evaluator classes. + * + * These classes are responsible for the evaluation of the tensor expression. + * + * TODO: add support for more types of expressions, in particular expressions + * leading to lvalues (slicing, reshaping, etc...) + */ + +// Generic evaluator +template +struct TensorEvaluator +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + + // NumDimensions is -1 for variable dim tensors + static const int NumCoords = internal::traits::NumDimensions > 0 ? + internal::traits::NumDimensions : 0; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = (internal::unpacket_traits::size > 1), + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) + : m_data(const_cast::template MakePointer::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) + { } + + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { + if (dest) { + m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); + return false; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_data); + return m_data[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + eigen_assert(m_data); + return m_data[index]; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + return internal::ploadt(m_data + index); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + return internal::pstoret(m_data + index, x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { + eigen_assert(m_data); + if (static_cast(Layout) == static_cast(ColMajor)) { + return m_data[m_dims.IndexOfColMajor(coords)]; + } else { + return m_data[m_dims.IndexOfRowMajor(coords)]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array& coords) { + eigen_assert(m_data); + if (static_cast(Layout) == static_cast(ColMajor)) { + return m_data[m_dims.IndexOfColMajor(coords)]; + } else { + return m_data[m_dims.IndexOfRowMajor(coords)]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + internal::unpacket_traits::size); + } + + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// required by sycl in order to construct sycl buffer from raw pointer + const Device& device() const{return m_device;} + + protected: + typename internal::traits::template MakePointer::Type m_data; + Dimensions m_dims; + const Device& m_device; + const Derived& m_impl; +}; + +namespace { +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T loadConstant(const T* address) { + return *address; +} +// Use the texture cache on CUDA devices whenever possible +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float loadConstant(const float* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double loadConstant(const double* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +Eigen::half loadConstant(const Eigen::half* address) { + return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); +} +#endif +} + + +// Default evaluator for rvalues +template +struct TensorEvaluator +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + + // NumDimensions is -1 for variable dim tensors + static const int NumCoords = internal::traits::NumDimensions > 0 ? + internal::traits::NumDimensions : 0; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = (internal::unpacket_traits::size > 1), + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true + }; + + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) + : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (!NumTraits::type>::RequireInitialization && data) { + m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); + return false; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_data); + return loadConstant(m_data+index); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + return internal::ploadt_ro(m_data + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { + eigen_assert(m_data); + const Index index = (static_cast(Layout) == static_cast(ColMajor)) ? m_dims.IndexOfColMajor(coords) + : m_dims.IndexOfRowMajor(coords); + return loadConstant(m_data+index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + internal::unpacket_traits::size); + } + + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + + protected: + typename internal::traits::template MakePointer::Type m_data; + Dimensions m_dims; + const Device& m_device; + const Derived& m_impl; +}; + + + + +// -------------------- CwiseNullaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseNullaryOp XprType; + + enum { + IsAligned = true, + PacketAccess = internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC + TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_wrapper(m_functor, index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_wrapper.template packetOp(m_functor, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + internal::unpacket_traits::size); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_argImpl; } + /// required by sycl in order to extract the accessor + NullaryOp functor() const { return m_functor; } + + + private: + const NullaryOp m_functor; + TensorEvaluator m_argImpl; + const internal::nullary_wrapper m_wrapper; +}; + + + +// -------------------- CwiseUnaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseUnaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression(), device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_argImpl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_argImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_argImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator & impl() const { return m_argImpl; } + /// added for sycl in order to construct the buffer from sycl device + UnaryOp functor() const { return m_functor; } + + + private: + const UnaryOp m_functor; + TensorEvaluator m_argImpl; +}; + + +// -------------------- CwiseBinaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseBinaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use right impl instead if right impl dimensions are known at compile time. + return m_leftImpl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_leftImpl.template packet(index), m_rightImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_leftImpl.costPerCoeff(vectorized) + + m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + /// required by sycl in order to extract the accessor + BinaryOp functor() const { return m_functor; } + + private: + const BinaryOp m_functor; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; +}; + +// -------------------- CwiseTernaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseTernaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_arg1Impl(op.arg1Expression(), device), + m_arg2Impl(op.arg2Expression(), device), + m_arg3Impl(op.arg3Expression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use arg2 or arg3 dimensions if they are known at compile time. + return m_arg1Impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_arg1Impl.evalSubExprsIfNeeded(NULL); + m_arg2Impl.evalSubExprsIfNeeded(NULL); + m_arg3Impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_arg1Impl.cleanup(); + m_arg2Impl.cleanup(); + m_arg3Impl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_arg1Impl.costPerCoeff(vectorized) + + m_arg2Impl.costPerCoeff(vectorized) + + m_arg3Impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator & arg1Impl() const { return m_arg1Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg2Impl() const { return m_arg2Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg3Impl() const { return m_arg3Impl; } + + private: + const TernaryOp m_functor; + TensorEvaluator m_arg1Impl; + TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg3Impl; +}; + + +// -------------------- SelectOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorSelectOp XprType; + typedef typename XprType::Scalar Scalar; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::packet_traits::HasBlend, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_condImpl(op.ifExpression(), device), + m_thenImpl(op.thenExpression(), device), + m_elseImpl(op.elseExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); + eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use then or else impl instead if they happen to be known at compile time. + return m_condImpl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_condImpl.evalSubExprsIfNeeded(NULL); + m_thenImpl.evalSubExprsIfNeeded(NULL); + m_elseImpl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_condImpl.cleanup(); + m_thenImpl.cleanup(); + m_elseImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); + } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + internal::Selector select; + for (Index i = 0; i < PacketSize; ++i) { + select.select[i] = m_condImpl.coeff(index+i); + } + return internal::pblend(select, + m_thenImpl.template packet(index), + m_elseImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return m_condImpl.costPerCoeff(vectorized) + + m_thenImpl.costPerCoeff(vectorized) + .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator & cond_impl() const { return m_condImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& then_impl() const { return m_thenImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& else_impl() const { return m_elseImpl; } + + private: + TensorEvaluator m_condImpl; + TensorEvaluator m_thenImpl; + TensorEvaluator m_elseImpl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h new file mode 100644 index 000000000..f01d77c0a --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -0,0 +1,288 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H + +namespace Eigen { + +/** \class TensorExecutor + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor executor class. + * + * This class is responsible for launch the evaluation of the expression on + * the specified computing device. + */ +namespace internal { + +// Default strategy: the expression is evaluated with a single cpu thread. +template +class TensorExecutor +{ + public: + typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC + static inline void run(const Expression& expr, const Device& device = Device()) + { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const Index size = array_prod(evaluator.dimensions()); + for (Index i = 0; i < size; ++i) { + evaluator.evalScalar(i); + } + } + evaluator.cleanup(); + } +}; + + +template +class TensorExecutor +{ + public: + typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC + static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) + { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const Index size = array_prod(evaluator.dimensions()); + const int PacketSize = unpacket_traits::PacketReturnType>::size; + // Give the compiler a strong hint to unroll the loop. But don't insist + // on unrolling, because if the function is expensive the compiler should not + // unroll the loop at the expense of inlining. + const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize; + for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) { + for (Index j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } + } + const Index VectorizedSize = (size / PacketSize) * PacketSize; + for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) { + evaluator.evalPacket(i); + } + for (Index i = VectorizedSize; i < size; ++i) { + evaluator.evalScalar(i); + } + } + evaluator.cleanup(); + } +}; + + + +// Multicore strategy: the index space is partitioned and each partition is executed on a single core +#ifdef EIGEN_USE_THREADS +template +struct EvalRange { + static void run(Evaluator* evaluator_in, const Index first, const Index last) { + Evaluator evaluator = *evaluator_in; + eigen_assert(last >= first); + for (Index i = first; i < last; ++i) { + evaluator.evalScalar(i); + } + } + + static Index alignBlockSize(Index size) { + return size; + } +}; + +template +struct EvalRange { + static const int PacketSize = unpacket_traits::size; + + static void run(Evaluator* evaluator_in, const Index first, const Index last) { + Evaluator evaluator = *evaluator_in; + eigen_assert(last >= first); + Index i = first; + if (last - first >= PacketSize) { + eigen_assert(first % PacketSize == 0); + Index last_chunk_offset = last - 4 * PacketSize; + // Give the compiler a strong hint to unroll the loop. But don't insist + // on unrolling, because if the function is expensive the compiler should not + // unroll the loop at the expense of inlining. + for (; i <= last_chunk_offset; i += 4*PacketSize) { + for (Index j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } + } + last_chunk_offset = last - PacketSize; + for (; i <= last_chunk_offset; i += PacketSize) { + evaluator.evalPacket(i); + } + } + for (; i < last; ++i) { + evaluator.evalScalar(i); + } + } + + static Index alignBlockSize(Index size) { + // Align block size to packet size and account for unrolling in run above. + if (size >= 16 * PacketSize) { + return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); + } + // Aligning to 4 * PacketSize would increase block size by more than 25%. + return (size + PacketSize - 1) & ~(PacketSize - 1); + } +}; + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static inline void run(const Expression& expr, const ThreadPoolDevice& device) + { + typedef TensorEvaluator Evaluator; + Evaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const Index size = array_prod(evaluator.dimensions()); +#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL) + device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), + EvalRange::alignBlockSize, + [&evaluator](Index first, Index last) { + EvalRange::run(&evaluator, first, last); + }); +#else + size_t num_threads = device.numThreads(); + if (num_threads > 1) { + num_threads = TensorCostModel::numThreads( + size, evaluator.costPerCoeff(Vectorizable), num_threads); + } + if (num_threads == 1) { + EvalRange::run(&evaluator, 0, size); + } else { + const Index PacketSize = Vectorizable ? unpacket_traits::size : 1; + Index blocksz = std::ceil(static_cast(size)/num_threads) + PacketSize - 1; + const Index blocksize = numext::maxi(PacketSize, (blocksz - (blocksz % PacketSize))); + const Index numblocks = size / blocksize; + + Barrier barrier(numblocks); + for (int i = 0; i < numblocks; ++i) { + device.enqueue_with_barrier( + &barrier, &EvalRange::run, + &evaluator, i * blocksize, (i + 1) * blocksize); + } + if (numblocks * blocksize < size) { + EvalRange::run( + &evaluator, numblocks * blocksize, size); + } + barrier.Wait(); + } +#endif // defined(!EIGEN_USE_SIMPLE_THREAD_POOL) + } + evaluator.cleanup(); + } +}; +#endif // EIGEN_USE_THREADS + + +// GPU: the evaluation of the expression is offloaded to a GPU. +#if defined(EIGEN_USE_GPU) + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static void run(const Expression& expr, const GpuDevice& device); +}; + + +#if defined(__CUDACC__) +template +struct EigenMetaKernelEval { + static __device__ EIGEN_ALWAYS_INLINE + void run(Evaluator& eval, Index first, Index last, Index step_size) { + for (Index i = first; i < last; i += step_size) { + eval.evalScalar(i); + } + } +}; + +template +struct EigenMetaKernelEval { + static __device__ EIGEN_ALWAYS_INLINE + void run(Evaluator& eval, Index first, Index last, Index step_size) { + const Index PacketSize = unpacket_traits::size; + const Index vectorized_size = (last / PacketSize) * PacketSize; + const Index vectorized_step_size = step_size * PacketSize; + + // Use the vector path + for (Index i = first * PacketSize; i < vectorized_size; + i += vectorized_step_size) { + eval.evalPacket(i); + } + for (Index i = vectorized_size + first; i < last; i += step_size) { + eval.evalScalar(i); + } + } +}; + +template +__global__ void +__launch_bounds__(1024) +EigenMetaKernel(Evaluator eval, Index size) { + + const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; + const Index step_size = blockDim.x * gridDim.x; + + const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; + EigenMetaKernelEval::run(eval, first_index, size, step_size); +} + +/*static*/ +template +inline void TensorExecutor::run( + const Expression& expr, const GpuDevice& device) { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + const int block_size = device.maxCudaThreadsPerBlock(); + const int max_blocks = device.getNumCudaMultiProcessors() * + device.maxCudaThreadsPerMultiProcessor() / block_size; + const Index size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, divup(size, block_size)), 1); + + LAUNCH_CUDA_KERNEL( + (EigenMetaKernel, Index>), + num_blocks, block_size, 0, device, evaluator, size); + } + evaluator.cleanup(); +} + +#endif // __CUDACC__ +#endif // EIGEN_USE_GPU + +// SYCL Executor policy +#ifdef EIGEN_USE_SYCL + +template +class TensorExecutor { +public: + static inline void run(const Expression &expr, const SyclDevice &device) { + // call TensorSYCL module + TensorSycl::run(expr, device); + } +}; + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h new file mode 100644 index 000000000..85dfc7a69 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -0,0 +1,371 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H + +namespace Eigen { + +/** \class TensorExpr + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor expression classes. + * + * The TensorCwiseNullaryOp class applies a nullary operators to an expression. + * This is typically used to generate constants. + * + * The TensorCwiseUnaryOp class represents an expression where a unary operator + * (e.g. cwiseSqrt) is applied to an expression. + * + * The TensorCwiseBinaryOp class represents an expression where a binary + * operator (e.g. addition) is applied to a lhs and a rhs expression. + * + */ +namespace internal { +template +struct traits > + : traits +{ + typedef traits XprTraits; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +} // end namespace internal + + + +template +class TensorCwiseNullaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef TensorCwiseNullaryOp Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const NullaryOp& functor() const { return m_functor; } + + protected: + typename XprType::Nested m_xpr; + const NullaryOp m_functor; +}; + + + +namespace internal { +template +struct traits > + : traits +{ + // TODO(phli): Add InputScalar, InputPacket. Check references to + // current Scalar/Packet to see if the intent is Input or Output. + typedef typename result_of::type Scalar; + typedef traits XprTraits; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseUnaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + // TODO(phli): Add InputScalar, InputPacket. Check references to + // current Scalar/Packet to see if the intent is Input or Output. + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const UnaryOp m_functor; +}; + + +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs + // are different. + // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to + // current Scalar/Packet to see if the intent is Inputs or Output. + typedef typename result_of< + BinaryOp(typename LhsXprType::Scalar, + typename RhsXprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename promote_storage_type< + typename traits::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type< + typename traits::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseBinaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to + // current Scalar/Packet to see if the intent is Inputs or Output. + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const BinaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const BinaryOp m_functor; +}; + + +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the args are different. + typedef typename result_of< + TernaryOp(typename Arg1XprType::Scalar, + typename Arg2XprType::Scalar, + typename Arg3XprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename Arg1XprType::Nested Arg1Nested; + typedef typename Arg2XprType::Nested Arg2Nested; + typedef typename Arg3XprType::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseTernaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseTernaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseTernaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) + : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg1Expression() const { return m_arg1_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg2Expression() const { return m_arg2_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg3Expression() const { return m_arg3_xpr; } + + protected: + typename Arg1XprType::Nested m_arg1_xpr; + typename Arg2XprType::Nested m_arg2_xpr; + typename Arg3XprType::Nested m_arg3_xpr; + const TernaryOp m_functor; +}; + + +namespace internal { +template +struct traits > + : traits +{ + typedef typename traits::Scalar Scalar; + typedef traits XprTraits; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename IfXprType::Nested IfNested; + typedef typename ThenXprType::Nested ThenNested; + typedef typename ElseXprType::Nested ElseNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorSelectOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorSelectOp type; +}; + +} // end namespace internal + + +template +class TensorSelectOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC + TensorSelectOp(const IfXprType& a_condition, + const ThenXprType& a_then, + const ElseXprType& a_else) + : m_condition(a_condition), m_then(a_then), m_else(a_else) + { } + + EIGEN_DEVICE_FUNC + const IfXprType& ifExpression() const { return m_condition; } + + EIGEN_DEVICE_FUNC + const ThenXprType& thenExpression() const { return m_then; } + + EIGEN_DEVICE_FUNC + const ElseXprType& elseExpression() const { return m_else; } + + protected: + typename IfXprType::Nested m_condition; + typename ThenXprType::Nested m_then; + typename ElseXprType::Nested m_else; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h new file mode 100644 index 000000000..08eb5595a --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -0,0 +1,651 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H +#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H + +// This code requires the ability to initialize arrays of constant +// values directly inside a class. +#if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 + +namespace Eigen { + +/** \class TensorFFT + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor FFT class. + * + * TODO: + * Vectorize the Cooley Tukey and the Bluestein algorithm + * Add support for multithreaded evaluation + * Improve the performance on GPU + */ + +template struct MakeComplex { + template + EIGEN_DEVICE_FUNC + T operator() (const T& val) const { return val; } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const T& val) const { return std::complex(val, 0); } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const std::complex& val) const { return val; } +}; + +template struct PartOf { + template T operator() (const T& val) const { return val; } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.real(); } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.imag(); } +}; + +namespace internal { +template +struct traits > : public traits { + typedef traits XprTraits; + typedef typename NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename XprTraits::Scalar InputScalar; + typedef typename conditional::type OutputScalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> { + typedef const TensorFFTOp& type; +}; + +template +struct nested, 1, typename eval >::type> { + typedef TensorFFTOp type; +}; + +} // end namespace internal + +template +class TensorFFTOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) + : m_xpr(expr), m_fft(fft) {} + + EIGEN_DEVICE_FUNC + const FFT& fft() const { return m_fft; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& expression() const { + return m_xpr; + } + + protected: + typename XprType::Nested m_xpr; + const FFT m_fft; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + typedef TensorFFTOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef internal::traits XprTraits; + typedef typename XprTraits::Scalar InputScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = true, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + m_size = m_dimensions.TotalSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (data) { + evalToBuf(data); + return false; + } else { + m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); + evalToBuf(m_data); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_data) { + m_device.deallocate(m_data); + m_data = NULL; + } + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { + return m_data[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType + packet(Index index) const { + return internal::ploadt(m_data + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { + const bool write_to_out = internal::is_same::value; + ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); + + for (Index i = 0; i < m_size; ++i) { + buf[i] = MakeComplex::value>()(m_impl.coeff(i)); + } + + for (size_t i = 0; i < m_fft.size(); ++i) { + Index dim = m_fft[i]; + eigen_assert(dim >= 0 && dim < NumDims); + Index line_len = m_dimensions[dim]; + eigen_assert(line_len >= 1); + ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); + const bool is_power_of_two = isPowerOfTwo(line_len); + const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + + ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); + if (!is_power_of_two) { + // Compute twiddle factors + // t_n = exp(sqrt(-1) * pi * n^2 / line_len) + // for n = 0, 1,..., line_len-1. + // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 + pos_j_base_powered[0] = ComplexScalar(1, 0); + if (line_len > 1) { + const RealScalar pi_over_len(EIGEN_PI / line_len); + const ComplexScalar pos_j_base = ComplexScalar( + std::cos(pi_over_len), std::sin(pi_over_len)); + pos_j_base_powered[1] = pos_j_base; + if (line_len > 2) { + const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; + for (int j = 2; j < line_len + 1; ++j) { + pos_j_base_powered[j] = pos_j_base_powered[j - 1] * + pos_j_base_powered[j - 1] / + pos_j_base_powered[j - 2] * pos_j_base_sq; + } + } + } + } + + for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { + const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); + + // get data into line_buf + const Index stride = m_strides[dim]; + if (stride == 1) { + memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); + } else { + Index offset = base_offset; + for (int j = 0; j < line_len; ++j, offset += stride) { + line_buf[j] = buf[offset]; + } + } + + // processs the line + if (is_power_of_two) { + processDataLineCooleyTukey(line_buf, line_len, log_len); + } + else { + processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); + } + + // write back + if (FFTDir == FFT_FORWARD && stride == 1) { + memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); + } else { + Index offset = base_offset; + const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); + for (int j = 0; j < line_len; ++j, offset += stride) { + buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; + } + } + } + m_device.deallocate(line_buf); + if (!is_power_of_two) { + m_device.deallocate(a); + m_device.deallocate(b); + m_device.deallocate(pos_j_base_powered); + } + } + + if(!write_to_out) { + for (Index i = 0; i < m_size; ++i) { + data[i] = PartOf()(buf[i]); + } + m_device.deallocate(buf); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { + eigen_assert(x > 0); + return !(x & (x - 1)); + } + + // The composite number for padding, used in Bluestein's FFT algorithm + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { + Index i = 2; + while (i < 2 * n - 1) i *= 2; + return i; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { + Index log2m = 0; + while (m >>= 1) log2m++; + return log2m; + } + + // Call Cooley Tukey algorithm directly, data length must be power of 2 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { + eigen_assert(isPowerOfTwo(line_len)); + scramble_FFT(line_buf, line_len); + compute_1D_Butterfly(line_buf, line_len, log_len); + } + + // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + Index n = line_len; + Index m = good_composite; + ComplexScalar* data = line_buf; + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + a[i] = data[i] * numext::conj(pos_j_base_powered[i]); + } + else { + a[i] = data[i] * pos_j_base_powered[i]; + } + } + for (Index i = n; i < m; ++i) { + a[i] = ComplexScalar(0, 0); + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[i]; + } + else { + b[i] = numext::conj(pos_j_base_powered[i]); + } + } + for (Index i = n; i < m - n; ++i) { + b[i] = ComplexScalar(0, 0); + } + for (Index i = m - n; i < m; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[m-i]; + } + else { + b[i] = numext::conj(pos_j_base_powered[m-i]); + } + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + scramble_FFT(b, m); + compute_1D_Butterfly(b, m, log_len); + + for (Index i = 0; i < m; ++i) { + a[i] *= b[i]; + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + //Do the scaling after ifft + for (Index i = 0; i < m; ++i) { + a[i] /= m; + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + data[i] = a[i] * numext::conj(pos_j_base_powered[i]); + } + else { + data[i] = a[i] * pos_j_base_powered[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { + eigen_assert(isPowerOfTwo(n)); + Index j = 1; + for (Index i = 1; i < n; ++i){ + if (j > i) { + std::swap(data[j-1], data[i-1]); + } + Index m = n >> 1; + while (m >= 2 && j > m) { + j -= m; + m >>= 1; + } + j += m; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { + ComplexScalar tmp = data[1]; + data[1] = data[0] - data[1]; + data[0] += tmp; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { + ComplexScalar tmp[4]; + tmp[0] = data[0] + data[1]; + tmp[1] = data[0] - data[1]; + tmp[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); + } else { + tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); + } + data[0] = tmp[0] + tmp[2]; + data[1] = tmp[1] + tmp[3]; + data[2] = tmp[0] - tmp[2]; + data[3] = tmp[1] - tmp[3]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { + ComplexScalar tmp_1[8]; + ComplexScalar tmp_2[8]; + + tmp_1[0] = data[0] + data[1]; + tmp_1[1] = data[0] - data[1]; + tmp_1[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); + } else { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); + } + tmp_1[4] = data[4] + data[5]; + tmp_1[5] = data[4] - data[5]; + tmp_1[6] = data[6] + data[7]; + if (Dir == FFT_FORWARD) { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); + } else { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); + } + tmp_2[0] = tmp_1[0] + tmp_1[2]; + tmp_2[1] = tmp_1[1] + tmp_1[3]; + tmp_2[2] = tmp_1[0] - tmp_1[2]; + tmp_2[3] = tmp_1[1] - tmp_1[3]; + tmp_2[4] = tmp_1[4] + tmp_1[6]; +// SQRT2DIV2 = sqrt(2)/2 +#define SQRT2DIV2 0.7071067811865476 + if (Dir == FFT_FORWARD) { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); + } else { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); + } + data[0] = tmp_2[0] + tmp_2[4]; + data[1] = tmp_2[1] + tmp_2[5]; + data[2] = tmp_2[2] + tmp_2[6]; + data[3] = tmp_2[3] + tmp_2[7]; + data[4] = tmp_2[0] - tmp_2[4]; + data[5] = tmp_2[1] - tmp_2[5]; + data[6] = tmp_2[2] - tmp_2[6]; + data[7] = tmp_2[3] - tmp_2[7]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( + ComplexScalar* data, Index n, Index n_power_of_2) { + // Original code: + // RealScalar wtemp = std::sin(M_PI/n); + // RealScalar wpi = -std::sin(2 * M_PI/n); + const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; + const RealScalar wpi = (Dir == FFT_FORWARD) + ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] + : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + + const ComplexScalar wp(wtemp, wpi); + const ComplexScalar wp_one = wp + ComplexScalar(1, 0); + const ComplexScalar wp_one_2 = wp_one * wp_one; + const ComplexScalar wp_one_3 = wp_one_2 * wp_one; + const ComplexScalar wp_one_4 = wp_one_3 * wp_one; + const Index n2 = n / 2; + ComplexScalar w(1.0, 0.0); + for (Index i = 0; i < n2; i += 4) { + ComplexScalar temp0(data[i + n2] * w); + ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); + ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); + ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); + w = w * wp_one_4; + + data[i + n2] = data[i] - temp0; + data[i] += temp0; + + data[i + 1 + n2] = data[i + 1] - temp1; + data[i + 1] += temp1; + + data[i + 2 + n2] = data[i + 2] - temp2; + data[i + 2] += temp2; + + data[i + 3 + n2] = data[i + 3] - temp3; + data[i + 3] += temp3; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( + ComplexScalar* data, Index n, Index n_power_of_2) { + eigen_assert(isPowerOfTwo(n)); + if (n > 8) { + compute_1D_Butterfly(data, n / 2, n_power_of_2 - 1); + compute_1D_Butterfly(data + n / 2, n / 2, n_power_of_2 - 1); + butterfly_1D_merge(data, n, n_power_of_2); + } else if (n == 8) { + butterfly_8(data); + } else if (n == 4) { + butterfly_4(data); + } else if (n == 2) { + butterfly_2(data); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { + Index result = 0; + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > omitted_dim; --i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + else { + for (Index i = 0; i < omitted_dim; ++i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + // Value of index_coords[omitted_dim] is not determined to this step + return result; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { + Index result = base + offset * m_strides[omitted_dim] ; + return result; + } + + protected: + Index m_size; + const FFT& m_fft; + Dimensions m_dimensions; + array m_strides; + TensorEvaluator m_impl; + CoeffReturnType* m_data; + const Device& m_device; + + // This will support a maximum FFT size of 2^32 for each dimension + // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; + const RealScalar m_sin_PI_div_n_LUT[32] = { + RealScalar(0.0), + RealScalar(-2), + RealScalar(-0.999999999999999), + RealScalar(-0.292893218813453), + RealScalar(-0.0761204674887130), + RealScalar(-0.0192147195967696), + RealScalar(-0.00481527332780311), + RealScalar(-0.00120454379482761), + RealScalar(-3.01181303795779e-04), + RealScalar(-7.52981608554592e-05), + RealScalar(-1.88247173988574e-05), + RealScalar(-4.70619042382852e-06), + RealScalar(-1.17654829809007e-06), + RealScalar(-2.94137117780840e-07), + RealScalar(-7.35342821488550e-08), + RealScalar(-1.83835707061916e-08), + RealScalar(-4.59589268710903e-09), + RealScalar(-1.14897317243732e-09), + RealScalar(-2.87243293150586e-10), + RealScalar( -7.18108232902250e-11), + RealScalar(-1.79527058227174e-11), + RealScalar(-4.48817645568941e-12), + RealScalar(-1.12204411392298e-12), + RealScalar(-2.80511028480785e-13), + RealScalar(-7.01277571201985e-14), + RealScalar(-1.75319392800498e-14), + RealScalar(-4.38298482001247e-15), + RealScalar(-1.09574620500312e-15), + RealScalar(-2.73936551250781e-16), + RealScalar(-6.84841378126949e-17), + RealScalar(-1.71210344531737e-17), + RealScalar(-4.28025861329343e-18) + }; + + // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); + const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { + RealScalar(0.0), + RealScalar(0.0), + RealScalar(-1.00000000000000e+00), + RealScalar(-7.07106781186547e-01), + RealScalar(-3.82683432365090e-01), + RealScalar(-1.95090322016128e-01), + RealScalar(-9.80171403295606e-02), + RealScalar(-4.90676743274180e-02), + RealScalar(-2.45412285229123e-02), + RealScalar(-1.22715382857199e-02), + RealScalar(-6.13588464915448e-03), + RealScalar(-3.06795676296598e-03), + RealScalar(-1.53398018628477e-03), + RealScalar(-7.66990318742704e-04), + RealScalar(-3.83495187571396e-04), + RealScalar(-1.91747597310703e-04), + RealScalar(-9.58737990959773e-05), + RealScalar(-4.79368996030669e-05), + RealScalar(-2.39684498084182e-05), + RealScalar(-1.19842249050697e-05), + RealScalar(-5.99211245264243e-06), + RealScalar(-2.99605622633466e-06), + RealScalar(-1.49802811316901e-06), + RealScalar(-7.49014056584716e-07), + RealScalar(-3.74507028292384e-07), + RealScalar(-1.87253514146195e-07), + RealScalar(-9.36267570730981e-08), + RealScalar(-4.68133785365491e-08), + RealScalar(-2.34066892682746e-08), + RealScalar(-1.17033446341373e-08), + RealScalar(-5.85167231706864e-09), + RealScalar(-2.92583615853432e-09) + }; +}; + +} // end namespace Eigen + +#endif // EIGEN_HAS_CONSTEXPR + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h new file mode 100644 index 000000000..fcee5f60d --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -0,0 +1,389 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H +#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H + +namespace Eigen { + +/** \class TensorFixedSize + * \ingroup CXX11_Tensor_Module + * + * \brief The fixed sized version of the tensor class. + * + * The fixed sized equivalent of + * Eigen::Tensor t(3, 5, 7); + * is + * Eigen::TensorFixedSize> t; + */ + +template +class TensorFixedSize : public TensorBase > +{ + public: + typedef TensorFixedSize Self; + typedef TensorBase > Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef Scalar_ Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + static const int Options = Options_; + + enum { + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0), + Layout = Options_ & RowMajor ? RowMajor : ColMajor, + CoordAccess = true, + RawAccess = true + }; + + typedef Dimensions_ Dimensions; + static const std::size_t NumIndices = Dimensions::count; + + protected: + TensorStorage m_storage; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } + + // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + // work, because that uses base().coeffRef() - and we don't yet + // implement a similar class hierarchy + inline Self& base() { return *this; } + inline const Self& base() const { return *this; } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeff(array{{firstIndex, otherIndices...}}); + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(array{{firstIndex, otherIndices...}}); + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return this->operator()(array{{firstIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } +#endif + + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + eigen_assert(checkIndexRange(indices)); + return coeff(indices); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return coeff(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const + { + // The bracket operator is only for vectors, use the parenthesis operator instead. + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return operator()(array{{firstIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + eigen_assert(checkIndexRange(indices)); + return coeffRef(indices); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator[](Index index) + { + // The bracket operator is only for vectors, use the parenthesis operator instead + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize() + : m_storage() + { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const Self& other) + : m_storage(other.m_storage) + { + } + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other) + : m_storage(other.m_storage) + { + } +#endif + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other) + { + // FIXME: check that the dimensions of other match the dimensions of *this. + // Unfortunately this isn't possible yet when the rhs is an expression. + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other) + { + // FIXME: check that the dimensions of other match the dimensions of *this. + // Unfortunately this isn't possible yet when the rhs is an expression. + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE bool checkIndexRange(const array& /*indices*/) const + { + using internal::array_apply_and_reduce; + using internal::array_zip_and_reduce; + using internal::greater_equal_zero_op; + using internal::logical_and_op; + using internal::lesser_op; + + return true; + // check whether the indices are all >= 0 + /* array_apply_and_reduce(indices) && + // check whether the indices fit in the dimensions + array_zip_and_reduce(indices, m_storage.dimensions());*/ + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index linearizedIndex(const array& indices) const + { + if (Options&RowMajor) { + return m_storage.dimensions().IndexOfRowMajor(indices); + } else { + return m_storage.dimensions().IndexOfColMajor(indices); + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h new file mode 100644 index 000000000..bbd5eb374 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H +#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H + +namespace Eigen { + +/** \class TensorForcedEval + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +namespace internal { +template class MakePointer_> +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; + template struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorForcedEvalOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorForcedEvalOp type; +}; + +} // end namespace internal + + + +template class MakePointer_> +class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + + +template class MakePointer_> +struct TensorEvaluator, Device> +{ + typedef TensorForcedEvalOp XprType; + typedef typename ArgType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = true, + PacketAccess = (PacketSize > 1), + Layout = TensorEvaluator::Layout, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + /// op_ is used for sycl + : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) + { } + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + const Index numValues = internal::array_prod(m_impl.dimensions()); + m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); + // Should initialize the memory in case we're dealing with non POD types. + if (NumTraits::RequireInitialization) { + for (Index i = 0; i < numValues; ++i) { + new(m_buffer+i) CoeffReturnType(); + } + } + typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; + EvalTo evalToTmp(m_buffer, m_op); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::type, PacketAccess>::run(evalToTmp, m_device); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_device.deallocate(m_buffer); + m_buffer = NULL; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_buffer[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return internal::ploadt(m_buffer + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC typename MakePointer::Type data() const { return m_buffer; } + + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator& impl() { return m_impl; } + /// used by sycl in order to build the sycl buffer + const Device& device() const{return m_device;} + private: + TensorEvaluator m_impl; + const ArgType m_op; + const Device& m_device; + typename MakePointer::Type m_buffer; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h new file mode 100644 index 000000000..52b803d7f --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -0,0 +1,109 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H + +namespace Eigen { + +// MakePointer class is used as a container of the adress space of the pointer +// on the host and on the device. From the host side it generates the T* pointer +// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to +// T* m_data on the host. It is always called on the device. +// Specialisation of MakePointer class for creating the sycl buffer with +// map_allocator. +template struct MakePointer { + typedef T* Type; +}; + +template class MakePointer_ = MakePointer> class TensorMap; +template class Tensor; +template class TensorFixedSize; +template class TensorRef; +template class TensorBase; + +template class TensorCwiseNullaryOp; +template class TensorCwiseUnaryOp; +template class TensorCwiseBinaryOp; +template class TensorCwiseTernaryOp; +template class TensorSelectOp; +template class MakePointer_ = MakePointer > class TensorReductionOp; +template class TensorIndexTupleOp; +template class TensorTupleReducerOp; +template class TensorConcatenationOp; +template class TensorContractionOp; +template class TensorConversionOp; +template class TensorConvolutionOp; +template class TensorFFTOp; +template class TensorPatchOp; +template class TensorImagePatchOp; +template class TensorVolumePatchOp; +template class TensorBroadcastingOp; +template class TensorChippingOp; +template class TensorReshapingOp; +template class TensorLayoutSwapOp; +template class TensorSlicingOp; +template class TensorReverseOp; +template class TensorPaddingOp; +template class TensorShufflingOp; +template class TensorStridingOp; +template class TensorStridingSlicingOp; +template class TensorInflationOp; +template class TensorGeneratorOp; +template class TensorAssignOp; +template class TensorScanOp; + +template class TensorCustomUnaryOp; +template class TensorCustomBinaryOp; + +template class MakePointer_ = MakePointer> class TensorEvalToOp; +template class MakePointer_ = MakePointer> class TensorForcedEvalOp; + +template class TensorDevice; +template struct TensorEvaluator; + +struct DefaultDevice; +struct ThreadPoolDevice; +struct GpuDevice; +struct SyclDevice; + +enum FFTResultType { + RealPart = 0, + ImagPart = 1, + BothParts = 2 +}; + +enum FFTDirection { + FFT_FORWARD = 0, + FFT_REVERSE = 1 +}; + + +namespace internal { + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess; +}; + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess && + TensorEvaluator::IsAligned; +}; + +template ::value> +class TensorExecutor; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h new file mode 100644 index 000000000..3b4f8eda1 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -0,0 +1,489 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H + +namespace Eigen { +namespace internal { + + +/** \internal + * \brief Template functor to compute the modulo between an array and a scalar. + */ +template +struct scalar_mod_op { + EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } + const Scalar m_divisor; +}; +template +struct functor_traits > +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; + + +/** \internal + * \brief Template functor to compute the modulo between 2 arrays. + */ +template +struct scalar_mod2_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op) + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } +}; +template +struct functor_traits > +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; + +template +struct scalar_fmod_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar + operator()(const Scalar& a, const Scalar& b) const { + return numext::fmod(a, b); + } +}; +template +struct functor_traits > { + enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell. + PacketAccess = false }; +}; + + +/** \internal + * \brief Template functor to compute the sigmoid of a scalar + * \sa class CwiseUnaryOp, ArrayBase::sigmoid() + */ +template +struct scalar_sigmoid_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { + const T one = T(1); + return one / (one + numext::exp(-x)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(const Packet& x) const { + const Packet one = pset1(T(1)); + return pdiv(one, padd(one, pexp(pnegate(x)))); + } +}; + +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost * 2 + NumTraits::MulCost * 6, + PacketAccess = packet_traits::HasAdd && packet_traits::HasDiv && + packet_traits::HasNegate && packet_traits::HasExp + }; +}; + + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + +// Standard reduction functors +template struct SumReducer +{ + static const bool PacketAccess = packet_traits::HasAdd; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = padd(*accum, p); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_sum_op sum_op; + return sum_op(saccum, predux(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + +template struct MeanReducer +{ + static const bool PacketAccess = packet_traits::HasAdd && !NumTraits::IsInteger; + static const bool IsStateful = true; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + MeanReducer() : scalarCount_(0), packetCount_(0) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); + scalarCount_++; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { + (*accum) = padd(*accum, p); + packetCount_++; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum / scalarCount_; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return pdiv(vaccum, pset1(packetCount_)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_sum_op sum_op; + return sum_op(saccum, predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits::size); + } + + protected: + DenseIndex scalarCount_; + DenseIndex packetCount_; +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::lowest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return -Eigen::NumTraits::infinity(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::highest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::infinity(); + } +}; + + +template struct MaxReducer +{ + static const bool PacketAccess = packet_traits::HasMax; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t > *accum) { *accum = t; } + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmax(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return MinMaxBottomValue::IsInteger>::bottom_value(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + return numext::maxi(saccum, predux_max(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMax + }; +}; + + +template struct MinReducer +{ + static const bool PacketAccess = packet_traits::HasMin; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t < *accum) { *accum = t; } + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmin(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return MinMaxBottomValue::IsInteger>::bottom_value(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + return numext::mini(saccum, predux_min(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMin + }; +}; + + +template struct ProdReducer +{ + static const bool PacketAccess = packet_traits::HasMul; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + internal::scalar_product_op prod_op; + (*accum) = prod_op(*accum, t); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmul(*accum, p); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(1); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_product_op prod_op; + return prod_op(saccum, predux_mul(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::HasMul + }; +}; + + +struct AndReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum && t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + +struct OrReducer { + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum || t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return false; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + +// Argmin/Argmax reducers +template struct ArgMaxTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t.second > accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::lowest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + +template struct ArgMinTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { + if (t.second < accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::highest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + +template +class GaussianGenerator { + public: + static const bool PacketAccess = false; + + EIGEN_DEVICE_FUNC GaussianGenerator(const array& means, + const array& std_devs) + : m_means(means) + { + for (size_t i = 0; i < NumDims; ++i) { + m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; + } + } + + EIGEN_DEVICE_FUNC T operator()(const array& coordinates) const { + T tmp = T(0); + for (size_t i = 0; i < NumDims; ++i) { + T offset = coordinates[i] - m_means[i]; + tmp += offset * offset / m_two_sigmas[i]; + } + return numext::exp(-tmp); + } + + private: + array m_means; + array m_two_sigmas; +}; + +template +struct functor_traits > { + enum { + Cost = NumDims * (2 * NumTraits::AddCost + NumTraits::MulCost + + functor_traits >::Cost) + + functor_traits >::Cost, + PacketAccess = GaussianGenerator::PacketAccess + }; +}; + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h new file mode 100644 index 000000000..eb1d4934e --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -0,0 +1,185 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H + +namespace Eigen { + +/** \class TensorGenerator + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor generator class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorGeneratorOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorGeneratorOp type; +}; + +} // end namespace internal + + + +template +class TensorGeneratorOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) + : m_xpr(expr), m_generator(generator) {} + + EIGEN_DEVICE_FUNC + const Generator& generator() const { return m_generator; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Generator m_generator; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorGeneratorOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + enum { + IsAligned = false, + PacketAccess = (internal::unpacket_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_generator(op.generator()) + { + TensorEvaluator impl(op.expression(), device); + m_dimensions = impl.dimensions(); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + array coords; + extract_coordinates(index, coords); + return m_generator(coords); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool) const { + // TODO(rmlarsen): This is just a placeholder. Define interface to make + // generators return their cost. + return TensorOpCost(0, 0, TensorOpCost::AddCost() + + TensorOpCost::MulCost()); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void extract_coordinates(Index index, array& coords) const { + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_strides[i]; + index -= idx * m_strides[i]; + coords[i] = idx; + } + coords[0] = index; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_strides[i]; + index -= idx * m_strides[i]; + coords[i] = idx; + } + coords[NumDims-1] = index; + } + } + + Dimensions m_dimensions; + array m_strides; + Generator m_generator; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h new file mode 100644 index 000000000..665b861cf --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. + * + * This function computes the regularized incomplete beta function (integral). + * + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const + TensorCwiseTernaryOp, + const ADerived, const BDerived, const XDerived> + betainc(const ADerived& a, const BDerived& b, const XDerived& x) { + return TensorCwiseTernaryOp< + internal::scalar_betainc_op, const ADerived, + const BDerived, const XDerived>( + a, b, x, internal::scalar_betainc_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h new file mode 100644 index 000000000..a901c5dd4 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H +#define EIGEN_CXX11_TENSOR_TENSOR_IO_H + +namespace Eigen { + +namespace internal { + +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } + } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} + +template +std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + + // Evaluate the expression if needed + TensorForcedEvalOp eval = expr.eval(); + Evaluator tensor(eval, DefaultDevice()); + tensor.evalSubExprsIfNeeded(NULL); + + // Print the result + static const int rank = internal::array_size::value; + internal::TensorPrinter::run(os, tensor); + + // Cleanup. + tensor.cleanup(); + return os; +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h new file mode 100644 index 000000000..566856ed2 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -0,0 +1,509 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H + +namespace Eigen { + +/** \class TensorImagePatch + * \ingroup CXX11_Tensor_Module + * + * \brief Patch extraction specialized for image processing. + * This assumes that the input has a least 3 dimensions ordered as follow: + * 1st dimension: channels (of size d) + * 2nd dimension: rows (of size r) + * 3rd dimension: columns (of size c) + * There can be additional dimensions such as time (for video) or batch (for + * bulk processing after the first 3. + * Calling the image patch code with patch_rows and patch_cols is equivalent + * to calling the regular patch extraction code with parameters d, patch_rows, + * patch_cols, and 1 for all the additional dimensions. + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename internal::remove_const::type Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorImagePatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorImagePatchOp type; +}; + +} // end namespace internal + +template +class TensorImagePatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + PaddingType padding_type, Scalar padding_value) + : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_row_strides(row_strides), m_col_strides(col_strides), + m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), + m_padding_type(padding_type), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + DenseIndex padding_top, DenseIndex padding_bottom, + DenseIndex padding_left, DenseIndex padding_right, + Scalar padding_value) + : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_row_strides(row_strides), m_col_strides(col_strides), + m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), + m_padding_left(padding_left), m_padding_right(padding_right), + m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + DenseIndex patch_rows() const { return m_patch_rows; } + EIGEN_DEVICE_FUNC + DenseIndex patch_cols() const { return m_patch_cols; } + EIGEN_DEVICE_FUNC + DenseIndex row_strides() const { return m_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_strides() const { return m_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_row_strides() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_col_strides() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } + EIGEN_DEVICE_FUNC + bool padding_explicit() const { return m_padding_explicit; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top() const { return m_padding_top; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom() const { return m_padding_bottom; } + EIGEN_DEVICE_FUNC + DenseIndex padding_left() const { return m_padding_left; } + EIGEN_DEVICE_FUNC + DenseIndex padding_right() const { return m_padding_right; } + EIGEN_DEVICE_FUNC + PaddingType padding_type() const { return m_padding_type; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const DenseIndex m_patch_rows; + const DenseIndex m_patch_cols; + const DenseIndex m_row_strides; + const DenseIndex m_col_strides; + const DenseIndex m_in_row_strides; + const DenseIndex m_in_col_strides; + const DenseIndex m_row_inflate_strides; + const DenseIndex m_col_inflate_strides; + const bool m_padding_explicit; + const DenseIndex m_padding_top; + const DenseIndex m_padding_bottom; + const DenseIndex m_padding_left; + const DenseIndex m_padding_right; + const PaddingType m_padding_type; + const Scalar m_padding_value; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorImagePatchOp XprType; + typedef typename XprType::Index Index; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims + 1; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef TensorEvaluator, + Device> Self; + typedef TensorEvaluator Impl; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + + m_paddingValue = op.padding_value(); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // Caches a few variables. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputDepth = input_dims[0]; + m_inputRows = input_dims[1]; + m_inputCols = input_dims[2]; + } else { + m_inputDepth = input_dims[NumInputDims-1]; + m_inputRows = input_dims[NumInputDims-2]; + m_inputCols = input_dims[NumInputDims-3]; + } + + m_row_strides = op.row_strides(); + m_col_strides = op.col_strides(); + + // Input strides and effective input/patch size + m_in_row_strides = op.in_row_strides(); + m_in_col_strides = op.in_col_strides(); + m_row_inflate_strides = op.row_inflate_strides(); + m_col_inflate_strides = op.col_inflate_strides(); + // The "effective" input rows and input cols are the input rows and cols + // after inflating them with zeros. + // For examples, a 2x3 matrix with row_inflate_strides and + // col_inflate_strides of 2 comes from: + // A B C + // D E F + // + // to a matrix is 3 x 5: + // + // A . B . C + // . . . . . + // D . E . F + + m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; + m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; + m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); + m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); + + if (op.padding_explicit()) { + m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_rowPaddingTop = op.padding_top(); + m_colPaddingLeft = op.padding_left(); + } else { + // Computing padding from the type + switch (op.padding_type()) { + case PADDING_VALID: + m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + // Calculate the padding + m_rowPaddingTop = numext::maxi(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2); + m_colPaddingLeft = numext::maxi(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2); + break; + case PADDING_SAME: + m_outputRows = numext::ceil(m_input_rows_eff / static_cast(m_row_strides)); + m_outputCols = numext::ceil(m_input_cols_eff / static_cast(m_col_strides)); + // Calculate the padding + m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; + m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; + break; + default: + eigen_assert(false && "unexpected padding"); + } + } + eigen_assert(m_outputRows > 0); + eigen_assert(m_outputCols > 0); + + // Dimensions for result of extraction. + if (static_cast(Layout) == static_cast(ColMajor)) { + // ColMajor + // 0: depth + // 1: patch_rows + // 2: patch_cols + // 3: number of patches + // 4 and beyond: anything else (such as batch). + m_dimensions[0] = input_dims[0]; + m_dimensions[1] = op.patch_rows(); + m_dimensions[2] = op.patch_cols(); + m_dimensions[3] = m_outputRows * m_outputCols; + for (int i = 4; i < NumDims; ++i) { + m_dimensions[i] = input_dims[i-1]; + } + } else { + // RowMajor + // NumDims-1: depth + // NumDims-2: patch_rows + // NumDims-3: patch_cols + // NumDims-4: number of patches + // NumDims-5 and beyond: anything else (such as batch). + m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; + m_dimensions[NumDims-2] = op.patch_rows(); + m_dimensions[NumDims-3] = op.patch_cols(); + m_dimensions[NumDims-4] = m_outputRows * m_outputCols; + for (int i = NumDims-5; i >= 0; --i) { + m_dimensions[i] = input_dims[i]; + } + } + + // Strides for moving the patch in various dimensions. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_colStride = m_dimensions[1]; + m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; + m_otherStride = m_patchStride * m_dimensions[3]; + } else { + m_colStride = m_dimensions[NumDims-2]; + m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; + m_otherStride = m_patchStride * m_dimensions[NumDims-4]; + } + + // Strides for navigating through the input tensor. + m_rowInputStride = m_inputDepth; + m_colInputStride = m_inputDepth * m_inputRows; + m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; + + // Fast representations of different variables. + m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); + m_fastPatchStride = internal::TensorIntDivisor(m_patchStride); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + m_fastInflateRowStride = internal::TensorIntDivisor(m_row_inflate_strides); + m_fastInflateColStride = internal::TensorIntDivisor(m_col_inflate_strides); + m_fastInputColsEff = internal::TensorIntDivisor(m_input_cols_eff); + + // Number of patches in the width dimension. + m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[0]); + } else { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Patch index corresponding to the passed in index. + const Index patchIndex = index / m_fastPatchStride; + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; + + // Other ways to index this element. + const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; + const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; + + // Calculate col index in the input original tensor. + const Index colIndex = patch2DIndex / m_fastOutputRows; + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; + const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0); + if (inputCol < 0 || inputCol >= m_input_cols_eff || + ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate row index in the original input tensor. + const Index rowIndex = patch2DIndex - colIndex * m_outputRows; + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; + const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0); + if (inputRow < 0 || inputRow >= m_input_rows_eff || + ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + + const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { + return packetWithPossibleZero(index); + } + + const Index indices[2] = {index, index + PacketSize - 1}; + const Index patchIndex = indices[0] / m_fastPatchStride; + if (patchIndex != indices[1] / m_fastPatchStride) { + return packetWithPossibleZero(index); + } + const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; + eigen_assert(otherIndex == indices[1] / m_fastOtherStride); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, + (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; + + const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; + eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); + + const Index colIndex = patch2DIndex / m_fastOutputRows; + const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; + + // Calculate col indices in the original input tensor. + const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - + m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; + if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputCols[0] == inputCols[1]) { + const Index rowIndex = patch2DIndex - colIndex * m_outputRows; + const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + // Calculate col indices in the original input tensor. + const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - + m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; + + if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { + // no padding + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; + return m_impl.template packet(inputIndex); + } + } + + return packetWithPossibleZero(index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + Index rowPaddingTop() const { return m_rowPaddingTop; } + Index colPaddingLeft() const { return m_colPaddingLeft; } + Index outputRows() const { return m_outputRows; } + Index outputCols() const { return m_outputCols; } + Index userRowStride() const { return m_row_strides; } + Index userColStride() const { return m_col_strides; } + Index userInRowStride() const { return m_in_row_strides; } + Index userInColStride() const { return m_in_col_strides; } + Index rowInflateStride() const { return m_row_inflate_strides; } + Index colInflateStride() const { return m_col_inflate_strides; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // We conservatively estimate the cost for the code path where the computed + // index is inside the original image and + // TensorEvaluator::CoordAccess is false. + const double compute_cost = 3 * TensorOpCost::DivCost() + + 6 * TensorOpCost::MulCost() + + 8 * TensorOpCost::MulCost(); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + + Index m_otherStride; + Index m_patchStride; + Index m_colStride; + Index m_row_strides; + Index m_col_strides; + + Index m_in_row_strides; + Index m_in_col_strides; + Index m_row_inflate_strides; + Index m_col_inflate_strides; + + Index m_input_rows_eff; + Index m_input_cols_eff; + Index m_patch_rows_eff; + Index m_patch_cols_eff; + + internal::TensorIntDivisor m_fastOtherStride; + internal::TensorIntDivisor m_fastPatchStride; + internal::TensorIntDivisor m_fastColStride; + internal::TensorIntDivisor m_fastInflateRowStride; + internal::TensorIntDivisor m_fastInflateColStride; + internal::TensorIntDivisor m_fastInputColsEff; + + Index m_rowInputStride; + Index m_colInputStride; + Index m_patchInputStride; + + Index m_inputDepth; + Index m_inputRows; + Index m_inputCols; + + Index m_outputRows; + Index m_outputCols; + + Index m_rowPaddingTop; + Index m_colPaddingLeft; + + internal::TensorIntDivisor m_fastOutputRows; + internal::TensorIntDivisor m_fastOutputDepth; + + Scalar m_paddingValue; + + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h new file mode 100644 index 000000000..3209fecd3 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -0,0 +1,725 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H + + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES + +#define EIGEN_HAS_INDEX_LIST + +namespace Eigen { + +/** \internal + * + * \class TensorIndexList + * \ingroup CXX11_Tensor_Module + * + * \brief Set of classes used to encode a set of Tensor dimensions/indices. + * + * The indices in the list can be known at compile time or at runtime. A mix + * of static and dynamic indices can also be provided if needed. The tensor + * code will attempt to take advantage of the indices that are known at + * compile time to optimize the code it generates. + * + * This functionality requires a c++11 compliant compiler. If your compiler + * is older you need to use arrays of indices instead. + * + * Several examples are provided in the cxx11_tensor_index_list.cpp file. + * + * \sa Tensor + */ + +template +struct type2index { + static const DenseIndex value = n; + EIGEN_DEVICE_FUNC constexpr operator DenseIndex() const { return n; } + EIGEN_DEVICE_FUNC void set(DenseIndex val) { + eigen_assert(val == n); + } +}; + +// This can be used with IndexPairList to get compile-time constant pairs, +// such as IndexPairList, type2indexpair<3,4>>(). +template +struct type2indexpair { + static const DenseIndex first = f; + static const DenseIndex second = s; + + constexpr EIGEN_DEVICE_FUNC operator IndexPair() const { + return IndexPair(f, s); + } + + EIGEN_DEVICE_FUNC void set(const IndexPair& val) { + eigen_assert(val.first == f); + eigen_assert(val.second == s); + } +}; + + +template struct NumTraits > +{ + typedef DenseIndex Real; + enum { + IsComplex = 0, + RequireInitialization = false, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; } + EIGEN_DEVICE_FUNC static inline Real highest() { return n; } + EIGEN_DEVICE_FUNC static inline Real lowest() { return n; } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { + val = new_val; +} +template +EIGEN_DEVICE_FUNC void update_value(type2index& val, DenseIndex new_val) { + val.set(new_val); +} + +template +EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair new_val) { + val = new_val; +} +template +EIGEN_DEVICE_FUNC void update_value(type2indexpair& val, IndexPair new_val) { + val.set(new_val); +} + + +template +struct is_compile_time_constant { + static constexpr bool value = false; +}; + +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; + +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; + + +template +struct IndexTuple; + +template +struct IndexTuple { + EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() { } + EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } + + constexpr static int count = 1 + sizeof...(O); + T head; + IndexTuple others; + typedef T Head; + typedef IndexTuple Other; +}; + +template + struct IndexTuple { + EIGEN_DEVICE_FUNC constexpr IndexTuple() : head() { } + EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v) : head(v) { } + + constexpr static int count = 1; + T head; + typedef T Head; +}; + + +template +struct IndexTupleExtractor; + +template +struct IndexTupleExtractor { + + typedef typename IndexTupleExtractor::ValType ValType; + + EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + + EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + template + EIGEN_DEVICE_FUNC static void set_val(IndexTuple& val, V& new_val) { + IndexTupleExtractor::set_val(val.others, new_val); + } + +}; + +template + struct IndexTupleExtractor<0, T, O...> { + + typedef T ValType; + + EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple& val) { + return val.head; + } + EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple& val) { + return val.head; + } + template + EIGEN_DEVICE_FUNC static void set_val(IndexTuple& val, V& new_val) { + val.head = new_val; + } +}; + + + +template +EIGEN_DEVICE_FUNC constexpr typename IndexTupleExtractor::ValType& array_get(IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template +EIGEN_DEVICE_FUNC constexpr const typename IndexTupleExtractor::ValType& array_get(const IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; + + + + +template +struct tuple_coeff { + template + EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex i, const IndexTuple& t) { + // return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + return (i == Idx ? array_get(t) : tuple_coeff::get(i, t)); + } + template + EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const ValueT& value) { + if (i == Idx) { + update_value(array_get(t), value); + } else { + tuple_coeff::set(i, t, value); + } + } + + template + EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple& t) { + return ((i == Idx) & is_compile_time_constant::ValType>::value) || + tuple_coeff::value_known_statically(i, t); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + tuple_coeff::values_up_to_known_statically(t); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + is_compile_time_constant::ValType>::value && + array_get(t) > array_get(t) && + tuple_coeff::values_up_to_statically_known_to_increase(t); + } +}; + +template +struct tuple_coeff<0, ValueT> { + template + EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex /*i*/, const IndexTuple& t) { + // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr + return array_get<0>(t)/* * (i == 0)*/; + } + template + EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const ValueT value) { + eigen_assert (i == 0); + update_value(array_get<0>(t), value); + } + template + EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple&) { + return is_compile_time_constant::ValType>::value & (i == 0); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple&) { + return is_compile_time_constant::ValType>::value; + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple&) { + return true; + } +}; +} // namespace internal + + + +template +struct IndexList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { + return internal::tuple_coeff >::value-1, DenseIndex>::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple(first, other...) { } + EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::value_known_statically(i, *this); + } + EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const { + return internal::tuple_coeff >::value-1, DenseIndex>::values_up_to_known_statically(*this); + } + + EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const { + return internal::tuple_coeff >::value-1, DenseIndex>::values_up_to_statically_known_to_increase(*this); + } +}; + + +template +constexpr IndexList make_index_list(FirstType val1, OtherTypes... other_vals) { + return IndexList(val1, other_vals...); +} + + +template +struct IndexPairList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair operator[] (const DenseIndex i) const { + return internal::tuple_coeff >::value-1, IndexPair>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const IndexPair value) { + return internal::tuple_coeff>::value-1, IndexPair >::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexPairList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexPairList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::value_known_statically(i, *this); + } +}; + +namespace internal { + +template size_t array_prod(const IndexList& sizes) { + size_t result = 1; + for (int i = 0; i < array_size >::value; ++i) { + result *= sizes[i]; + } + return result; +} + +template struct array_size > { + static const size_t value = array_size >::value; +}; +template struct array_size > { + static const size_t value = array_size >::value; +}; + +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; + +template EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(IndexList& a) { + return IndexTupleExtractor::get_val(a); +} +template EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(const IndexList& a) { + return IndexTupleExtractor::get_val(a); +} + +template +struct index_known_statically_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return false; + } +}; + +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { + return IndexList().value_known_statically(i); + } +}; + +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { + return IndexList().value_known_statically(i); + } +}; + + +template +struct all_indices_known_statically_impl { + static constexpr bool run() { + return false; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return IndexList().all_values_known_statically(); + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return IndexList().all_values_known_statically(); + } +}; + + +template +struct indices_statically_known_to_increase_impl { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return false; + } +}; + +template + struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); + } +}; + +template + struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); + } +}; + + +template +struct index_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) == value); + } +}; + +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) == value); + } +}; + + +template +struct index_statically_ne_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) != value); + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) != value); + } +}; + + +template +struct index_statically_gt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) > value); + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) > value); + } +}; + + + +template +struct index_statically_lt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + + + +template +struct index_pair_first_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + + + +template +struct index_pair_second_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + + +} // end namespace internal +} // end namespace Eigen + +#else + +namespace Eigen { +namespace internal { + +template +struct index_known_statically_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return false; + } +}; + +template +struct all_indices_known_statically_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return false; + } +}; + +template +struct indices_statically_known_to_increase_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return false; + } +}; + +template +struct index_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + + + +} // end namespace internal +} // end namespace Eigen + +#endif + + +namespace Eigen { +namespace internal { +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) { + return index_known_statically_impl::run(i); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { + return all_indices_known_statically_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { + return indices_statically_known_to_increase_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) { + return index_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) { + return index_statically_ne_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) { + return index_statically_gt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) { + return index_statically_lt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(DenseIndex i, DenseIndex value) { + return index_pair_first_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(DenseIndex i, DenseIndex value) { + return index_pair_second_statically_eq_impl::run(i, value); +} + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h new file mode 100644 index 000000000..f391fb9ee --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -0,0 +1,229 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Ke Yang +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H + +namespace Eigen { + +/** \class TensorInflation + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor inflation class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorInflationOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorInflationOp type; +}; + +} // end namespace internal + +template +class TensorInflationOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) + : m_xpr(expr), m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const Strides& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorInflationOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_strides(op.strides()) + { + m_dimensions = m_impl.dimensions(); + // Expand each dimension to the inflated dimension. + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; + } + + // Remember the strides for fast division. + for (int i = 0; i < NumDims; ++i) { + m_fastStrides[i] = internal::TensorIntDivisor(m_strides[i]); + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + } + } else { // RowMajor + m_outputStrides[NumDims-1] = 1; + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + // Computes the input index given the output index. Returns true if the output + // index doesn't fall into a hole. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const + { + eigen_assert(index < dimensions().TotalSize()); + *inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (idx != idx / m_fastStrides[i] * m_strides[i]) { + return false; + } + *inputIndex += idx / m_strides[i] * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (index != index / m_fastStrides[0] * m_strides[0]) { + return false; + } + *inputIndex += index / m_strides[0]; + return true; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (idx != idx / m_fastStrides[i] * m_strides[i]) { + return false; + } + *inputIndex += idx / m_strides[i] * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { + return false; + } + *inputIndex += index / m_strides[NumDims - 1]; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Index inputIndex = 0; + if (getInputIndex(index, &inputIndex)) { + return m_impl.coeff(inputIndex); + } else { + return Scalar(0); + } + } + + // TODO(yangke): optimize this function so that we can detect and produce + // all-zero packets + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (3 * TensorOpCost::DivCost() + + 3 * TensorOpCost::MulCost() + + 2 * TensorOpCost::AddCost()); + const double input_size = m_impl.dimensions().TotalSize(); + const double output_size = m_dimensions.TotalSize(); + if (output_size == 0) + return TensorOpCost(); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, + compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Strides m_strides; + array, NumDims> m_fastStrides; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h new file mode 100644 index 000000000..33edc49e3 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h @@ -0,0 +1,82 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H +#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H + +#if EIGEN_HAS_VARIADIC_TEMPLATES + +#include + +namespace Eigen { + +/** \class TensorInitializer + * \ingroup CXX11_Tensor_Module + * + * \brief Helper template to initialize Tensors from std::initializer_lists. + */ +namespace internal { + +template +struct Initializer { + typedef std::initializer_list< + typename Initializer::InitList> InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>* indices, + const InitList& vals) { + int i = 0; + for (auto v : vals) { + (*indices)[traits::NumDimensions - N] = i++; + Initializer::run(tensor, indices, v); + } + } +}; + +template +struct Initializer { + typedef std::initializer_list::Scalar> InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>* indices, + const InitList& vals) { + int i = 0; + // There is likely a faster way to do that than iterating. + for (auto v : vals) { + (*indices)[traits::NumDimensions - 1] = i++; + tensor.coeffRef(*indices) = v; + } + } +}; + +template +struct Initializer { + typedef typename traits::Scalar InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>*, + const InitList& v) { + tensor.coeffRef(0) = v; + } +}; + + +template +void initialize_tensor(TensorEvaluator& tensor, + const typename Initializer::NumDimensions>::InitList& vals) { + Eigen::array::Index, traits::NumDimensions> indices; + Initializer::NumDimensions>::run(tensor, &indices, vals); +} + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_HAS_VARIADIC_TEMPLATES + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h new file mode 100644 index 000000000..ede3939c2 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -0,0 +1,253 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H +#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H + + +namespace Eigen { + +/** \internal + * + * \class TensorIntDiv + * \ingroup CXX11_Tensor_Module + * + * \brief Fast integer division by a constant. + * + * See the paper from Granlund and Montgomery for explanation. + * (at http://dx.doi.org/10.1145/773473.178249) + * + * \sa Tensor + */ + +namespace internal { + +namespace { + + // Note: result is undefined if val == 0 + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clz(val); +#elif EIGEN_COMP_MSVC + unsigned long index; + _BitScanReverse(&index, val); + return 31 - index; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clz(static_cast(val)); +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clzll(val); +#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 + unsigned long index; + _BitScanReverse64(&index, val); + return 63 - index; +#elif EIGEN_COMP_MSVC + // MSVC's _BitScanReverse64 is not available for 32bits builds. + unsigned int lo = (unsigned int)(val&0xffffffff); + unsigned int hi = (unsigned int)((val>>32)&0xffffffff); + int n; + if(hi==0) + n = 32 + count_leading_zeros(lo); + else + n = count_leading_zeros(hi); + return n; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clzll(static_cast(val)); +#endif + } + + template + struct UnsignedTraits { + typedef typename conditional::type type; + }; + + template + struct DividerTraits { + typedef typename UnsignedTraits::type type; + static const int N = sizeof(T) * 8; + }; + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { +#if defined(__CUDA_ARCH__) + return __umulhi(a, b); +#else + return (static_cast(a) * b) >> 32; +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { +#if defined(__CUDA_ARCH__) + return __umul64hi(a, b); +#elif defined(__SIZEOF_INT128__) + __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); + return static_cast(v >> 64); +#else + return (TensorUInt128, uint64_t>(a) * TensorUInt128, uint64_t>(b)).upper(); +#endif + } + + template + struct DividerHelper { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { + EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); + return static_cast((static_cast(1) << (N+log_div)) / divider - (static_cast(1) << N) + 1); + } + }; + + template + struct DividerHelper<64, T> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { +#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) + return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); +#else + const uint64_t shift = 1ULL << log_div; + TensorUInt128 result = TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) + - TensorUInt128, static_val<0> >(1, 0) + + TensorUInt128, static_val<1> >(1); + return static_cast(result); +#endif + } + }; +} + + +template +struct TensorIntDivisor { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { + multiplier = 0; + shift1 = 0; + shift2 = 0; + } + + // Must have 0 < divider < 2^31. This is relaxed to + // 0 < divider < 2^63 when using 64-bit indices on platforms that support + // the __uint128_t type. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { + const int N = DividerTraits::N; + eigen_assert(static_cast::type>(divider) < NumTraits::highest()/2); + eigen_assert(divider > 0); + + // fast ln2 + const int leading_zeros = count_leading_zeros(static_cast(divider)); + int log_div = N - leading_zeros; + // if divider is a power of two then log_div is 1 more than it should be. + if ((static_cast::type>(1) << (log_div-1)) == static_cast::type>(divider)) + log_div--; + + multiplier = DividerHelper::computeMultiplier(log_div, divider); + shift1 = log_div > 1 ? 1 : log_div; + shift2 = log_div > 1 ? log_div-1 : 0; + } + + // Must have 0 <= numerator. On platforms that dont support the __uint128_t + // type numerator should also be less than 2^32-1. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { + eigen_assert(static_cast::type>(numerator) < NumTraits::highest()/2); + //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above + + UnsignedType t1 = muluh(multiplier, numerator); + UnsignedType t = (static_cast(numerator) - t1) >> shift1; + return (t1 + t) >> shift2; + } + + private: + typedef typename DividerTraits::type UnsignedType; + UnsignedType multiplier; + int32_t shift1; + int32_t shift2; +}; + + +// Optimized version for signed 32 bit integers. +// Derived from Hacker's Delight. +// Only works for divisors strictly greater than one +template <> +class TensorIntDivisor { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { + magic = 0; + shift = 0; + } + // Must have 2 <= divider + EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) { + eigen_assert(divider >= 2); + calcMagic(divider); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { +#ifdef __CUDA_ARCH__ + return (__umulhi(magic, n) >> shift); +#else + uint64_t v = static_cast(magic) * static_cast(n); + return (static_cast(v >> 32) >> shift); +#endif + } + +private: + // Compute the magic numbers. See Hacker's Delight section 10 for an in + // depth explanation. + EIGEN_DEVICE_FUNC void calcMagic(int32_t d) { + const unsigned two31 = 0x80000000; // 2**31. + unsigned ad = d; + unsigned t = two31 + (ad >> 31); + unsigned anc = t - 1 - t%ad; // Absolute value of nc. + int p = 31; // Init. p. + unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|. + unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). + unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|. + unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). + unsigned delta = 0; + do { + p = p + 1; + q1 = 2*q1; // Update q1 = 2**p/|nc|. + r1 = 2*r1; // Update r1 = rem(2**p, |nc|). + if (r1 >= anc) { // (Must be an unsigned + q1 = q1 + 1; // comparison here). + r1 = r1 - anc;} + q2 = 2*q2; // Update q2 = 2**p/|d|. + r2 = 2*r2; // Update r2 = rem(2**p, |d|). + if (r2 >= ad) { // (Must be an unsigned + q2 = q2 + 1; // comparison here). + r2 = r2 - ad;} + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + magic = (unsigned)(q2 + 1); + shift = p - 32; + } + + uint32_t magic; + int32_t shift; +}; + + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { + return divisor.divide(numerator); +} + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h new file mode 100644 index 000000000..cd0109ef4 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -0,0 +1,209 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H +#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H + +namespace Eigen { + +/** \class TensorLayoutSwap + * \ingroup CXX11_Tensor_Module + * + * \brief Swap the layout from col-major to row-major, or row-major + * to col-major, and invert the order of the dimensions. + * + * Beware: the dimensions are reversed by this operation. If you want to + * preserve the ordering of the dimensions, you need to combine this + * operation with a shuffle. + * + * \example: + * Tensor input(2, 4); + * Tensor output = input.swap_layout(); + * eigen_assert(output.dimension(0) == 4); + * eigen_assert(output.dimension(1) == 2); + * + * array shuffle(1, 0); + * output = input.swap_layout().shuffle(shuffle); + * eigen_assert(output.dimension(0) == 2); + * eigen_assert(output.dimension(1) == 4); + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = (traits::Layout == ColMajor) ? RowMajor : ColMajor; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorLayoutSwapOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorLayoutSwapOp type; +}; + +} // end namespace internal + + + +template +class TensorLayoutSwapOp : public TensorBase, WriteAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorLayoutSwapOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + for(int i = 0; i < NumDims; ++i) { + m_dimensions[i] = m_impl.dimensions()[NumDims-1-i]; + } + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + return m_impl.evalSubExprsIfNeeded(data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_impl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); } + + const TensorEvaluator& impl() const { return m_impl; } + + protected: + TensorEvaluator m_impl; + Dimensions m_dimensions; +}; + + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorLayoutSwapOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, + CoordAccess = false // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(index); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + this->m_impl.template writePacket(index, x); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h new file mode 100644 index 000000000..ee0078bbc --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H + + +/** use this macro in sfinae selection in templated functions + * + * template::value , int >::type = 0 + * > + * void foo(){} + * + * becomes => + * + * template::value ) + * > + * void foo(){} + */ + +// SFINAE requires variadic templates +#ifndef __CUDACC__ +#if EIGEN_HAS_VARIADIC_TEMPLATES + // SFINAE doesn't work for gcc <= 4.7 + #ifdef EIGEN_COMP_GNUC + #if EIGEN_GNUC_AT_LEAST(4,8) + #define EIGEN_HAS_SFINAE + #endif + #else + #define EIGEN_HAS_SFINAE + #endif +#endif +#endif + +#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ + typename internal::enable_if< ( __condition__ ) , int >::type = 0 + + +#if EIGEN_HAS_CONSTEXPR +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + + +#endif diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h new file mode 100644 index 000000000..a8e55757e --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -0,0 +1,321 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H +#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H + +namespace Eigen { + +/** \class TensorMap + * \ingroup CXX11_Tensor_Module + * + * \brief A tensor expression mapping an existing array of data. + * + */ +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +template class MakePointer_> class TensorMap : public TensorBase > +{ + public: + typedef TensorMap Self; + typedef typename PlainObjectType::Base Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + /* typedef typename internal::conditional< + bool(internal::is_lvalue::value), + Scalar *, + const Scalar *>::type + PointerType;*/ + typedef typename MakePointer_::Type PointerType; + typedef PointerType PointerArgType; + + static const int Options = Options_; + + static const Index NumIndices = PlainObjectType::NumIndices; + typedef typename PlainObjectType::Dimensions Dimensions; + + enum { + IsAligned = ((int(Options_)&Aligned)==Aligned), + Layout = PlainObjectType::Layout, + CoordAccess = true, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) { + EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) { + EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) { + EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) { + EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array& dimensions) + : m_data(dataPtr), m_dimensions(dimensions) + { } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) + : m_data(dataPtr), m_dimensions(dimensions) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) + : m_data(tensor.data()), m_dimensions(tensor.dimensions()) + { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PointerType data() { return m_data; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[1]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + const std::size_t NumDims = sizeof...(otherIndices) + 2; + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[1]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Self& operator=(const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + private: + typename MakePointer_::Type m_data; + Dimensions m_dimensions; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h new file mode 100644 index 000000000..615559d44 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -0,0 +1,218 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_H + +namespace Eigen { + +template struct Cond {}; + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T1& choose(Cond, const T1& first, const T2&) { + return first; +} + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T2& choose(Cond, const T1&, const T2& second) { + return second; +} + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const X x, const Y y) { + return static_cast((x + y - 1) / y); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const T x, const T y) { + return static_cast((x + y - 1) / y); +} + +template struct max_n_1 { + static const size_t size = n; +}; +template <> struct max_n_1<0> { + static const size_t size = 1; +}; + + +// Default packet types +template +struct PacketType : internal::packet_traits { + typedef typename internal::packet_traits::type type; +}; + +// For CUDA packet types when using a GpuDevice +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) +template <> +struct PacketType { + typedef half2 type; + static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; +}; +#endif + +#if defined(EIGEN_USE_SYCL) +template + struct PacketType { + typedef T type; + static const int size = 1; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasArg = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0 + }; +}; +#endif + + +// Tuple mimics std::pair but works on e.g. nvcc. +template struct Tuple { + public: + U first; + V second; + + typedef U first_type; + typedef V second_type; + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple() : first(), second() {} + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple(const U& f, const V& s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple& operator= (const Tuple& rhs) { + if (&rhs == this) return *this; + first = rhs.first; + second = rhs.second; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void swap(Tuple& rhs) { + using numext::swap; + swap(first, rhs.first); + swap(second, rhs.second); + } +}; + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator==(const Tuple& x, const Tuple& y) { + return (x.first == y.first && x.second == y.second); +} + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator!=(const Tuple& x, const Tuple& y) { + return !(x == y); +} + + +// Can't use std::pairs on cuda devices +template struct IndexPair { + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {} + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC void set(IndexPair val) { + first = val.first; + second = val.second; + } + + Idx first; + Idx second; +}; + + +#ifdef EIGEN_HAS_SFINAE +namespace internal { + + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx, numeric_list) { + return { idx[Is]... }; + } + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType&, numeric_list) { + return array(); + } + + /** Make an array (for index/dimensions) out of a custom index */ + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx) { + return customIndices2Array(idx, typename gen_numeric_list::type{}); + } + + + template + struct is_base_of + { + + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct Host + { + operator BB*() const; + operator DD*(); + }; + + template + static yes check(D*, T); + static no check(B*, int); + + static const bool value = sizeof(check(Host(), int())) == sizeof(yes); + }; + +} +#endif + + + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h new file mode 100644 index 000000000..284f29345 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -0,0 +1,905 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H +#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H + +namespace Eigen { + +/** \class TensorReshaping + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorReshapingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorReshapingOp type; +}; + +} // end namespace internal + + + +template +class TensorReshapingOp : public TensorBase, WriteAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) + : m_xpr(expr), m_dims(dims) {} + + EIGEN_DEVICE_FUNC + const NewDimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const NewDimensions m_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorReshapingOp XprType; + typedef NewDimensions Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dimensions(op.dimensions()) + { + // The total size of the reshaped tensor must be equal to the total size + // of the input tensor. + eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + return m_impl.evalSubExprsIfNeeded(data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_impl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast(m_impl.data()); } + + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } + + protected: + TensorEvaluator m_impl; + NewDimensions m_dimensions; +}; + + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> + +{ + typedef TensorEvaluator, Device> Base; + typedef TensorReshapingOp XprType; + typedef NewDimensions Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(index); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + this->m_impl.template writePacket(index, x); + } +}; + + +/** \class TensorSlicing + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor slicing class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorSlicingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorSlicingOp type; +}; + +} // end namespace internal + + + +template +class TensorSlicingOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) + : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_indices; } + EIGEN_DEVICE_FUNC + const Sizes& sizes() const { return m_sizes; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_indices; + const Sizes m_sizes; +}; + + +// Fixme: figure out the exact threshold +namespace { +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } + EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; } + + private: + Index threshold_; +}; + +// It is very expensive to start the memcpy kernel on GPU: we therefore only +// use it for large copies. +#ifdef EIGEN_USE_GPU +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } + EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } +}; +#endif + +// It is very expensive to start the memcpy kernel on GPU: we therefore only +// use it for large copies. +#ifdef EIGEN_USE_SYCL +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { } + EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } +}; +#endif + +} + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = /*TensorEvaluator::IsAligned*/false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) + { + for (std::size_t i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const Sizes& output_dims = op.sizes(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); + } + } else { + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + } + + // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); + } + } + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Sizes Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (!NumTraits::type>::RequireInitialization && data && m_impl.data()) { + Index contiguous_values = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + contiguous_values *= dimensions()[i]; + if (dimensions()[i] != m_impl.dimensions()[i]) { + break; + } + } + } else { + for (int i = NumDims-1; i >= 0; --i) { + contiguous_values *= dimensions()[i]; + if (dimensions()[i] != m_impl.dimensions()[i]) { + break; + } + } + } + // Use memcpy if it's going to be faster than using the regular evaluation. + const MemcpyTriggerForSlicing trigger(m_device); + if (trigger(contiguous_values)) { + Scalar* src = (Scalar*)m_impl.data(); + for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { + Index offset = srcCoeff(i); + m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); + } + return false; + } + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_fastOutputStrides[i]; + const Index idx1 = indices[1] / m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[0]); + inputIndices[1] += (indices[1] + m_offsets[0]); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_fastOutputStrides[i]; + const Index idx1 = indices[1] / m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); + inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); + } + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[packetSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < packetSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { + Scalar* result = m_impl.data(); + if (result) { + Index offset = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (m_dimensions[i] != m_impl.dimensions()[i]) { + offset += m_offsets[i] * m_inputStrides[i]; + for (int j = i+1; j < NumDims; ++j) { + if (m_dimensions[j] > 1) { + return NULL; + } + offset += m_offsets[j] * m_inputStrides[j]; + } + break; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (m_dimensions[i] != m_impl.dimensions()[i]) { + offset += m_offsets[i] * m_inputStrides[i]; + for (int j = i-1; j >= 0; --j) { + if (m_dimensions[j] > 1) { + return NULL; + } + offset += m_offsets[j] * m_inputStrides[j]; + } + break; + } + } + } + return result + offset; + } + return NULL; + } + /// used by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator& impl() const{ + return m_impl; + } + /// used by sycl + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const StartIndices& startIndices() const{ + return m_offsets; + } + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += (index + m_offsets[0]); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += (index + m_offsets[NumDims-1]); + } + return inputIndex; + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Device& m_device; + Dimensions m_dimensions; + const StartIndices m_offsets; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Sizes Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + const int packetSize = internal::unpacket_traits::size; + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; + const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; + inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + this->m_offsets[0]); + inputIndices[1] += (indices[1] + this->m_offsets[0]); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; + const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; + inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); + inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); + } + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + this->m_impl.template writePacket(inputIndices[0], x); + } + else { + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + internal::pstore(values, x); + this->m_impl.coeffRef(inputIndices[0]) = values[0]; + this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; + for (int i = 1; i < packetSize-1; ++i) { + this->coeffRef(index+i) = values[i]; + } + } + } +}; + + + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorStridingSlicingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorStridingSlicingOp type; +}; + +} // end namespace internal + + +template +class TensorStridingSlicingOp : public TensorBase > +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( + const XprType& expr, const StartIndices& startIndices, + const StopIndices& stopIndices, const Strides& strides) + : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), + m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_startIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& stopIndices() const { return m_stopIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run( + assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run( + assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_startIndices; + const StopIndices m_stopIndices; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) + { + // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero + DSizes startIndicesClamped, stopIndicesClamped; + for (size_t i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); + if(m_strides[i]>0){ + startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); + stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); + }else{ + /* implies m_strides[i]<0 by assert */ + startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); + stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); + } + m_startIndices[i] = startIndicesClamped[i]; + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // check for degenerate intervals and compute output tensor shape + bool degenerate = false;; + for(int i = 0; i < NumDims; i++){ + Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; + if(interval == 0 || ((interval<0) != (m_strides[i]<0))){ + m_dimensions[i] = 0; + degenerate = true; + }else{ + m_dimensions[i] = interval / m_strides[i] + + (interval % m_strides[i] != 0 ? 1 : 0); + eigen_assert(m_dimensions[i] >= 0); + } + } + Strides output_dims = m_dimensions; + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = m_strides[0]; + m_offsets[0] = startIndicesClamped[0]; + Index previousDimProduct = 1; + for (int i = 1; i < NumDims; ++i) { + previousDimProduct *= input_dims[i-1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor(degenerate ? 1 : m_outputStrides[i]); + } + } else { + m_inputStrides[NumDims-1] = m_strides[NumDims-1]; + m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; + Index previousDimProduct = 1; + for (int i = NumDims - 2; i >= 0; --i) { + previousDimProduct *= input_dims[i+1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor(degenerate ? 1 : m_outputStrides[i]); + } + } + m_block_total_size_max = numext::maxi(static_cast(1), + device.lastLevelCacheSize() / + sizeof(Scalar)); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNonConst; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { + return NULL; + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i >= 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } else { + for (int i = 0; i < NumDims; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } + return inputIndex; + } + + static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { + return numext::maxi(min, numext::mini(max,value)); + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Device& m_device; + DSizes m_startIndices; // clamped startIndices + DSizes m_dimensions; + DSizes m_offsets; // offset in a flattened shape + const Strides m_strides; + std::size_t m_block_total_size_max; +}; + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = TensorEvaluator::CoordAccess, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNonConst; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h new file mode 100644 index 000000000..647bcf108 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -0,0 +1,397 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H +#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H + +namespace Eigen { + +/** \class TensorPadding + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor padding class. + * At the moment only padding with a constant value is supported. + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorPaddingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorPaddingOp type; +}; + +} // end namespace internal + + + +template +class TensorPaddingOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value) + : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + const PaddingDimensions& padding() const { return m_padding_dims; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const PaddingDimensions m_padding_dims; + const Scalar m_padding_value; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorPaddingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = true, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = true, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()) + { + // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead + // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector + // of 1 element first and then pad. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Compute dimensions + m_dimensions = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] += m_padding[i].first + m_padding[i].second; + } + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; + } else { + m_inputStrides[NumDims - 1] = 1; + m_outputStrides[NumDims] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1]; + } + m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + eigen_assert(index < dimensions().TotalSize()); + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (isPaddingAtIndexForDim(idx, i)) { + return m_paddingValue; + } + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (isPaddingAtIndexForDim(index, 0)) { + return m_paddingValue; + } + inputIndex += (index - m_padding[0].first); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i+1]; + if (isPaddingAtIndexForDim(idx, i)) { + return m_paddingValue; + } + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i+1]; + } + if (isPaddingAtIndexForDim(index, NumDims-1)) { + return m_paddingValue; + } + inputIndex += (index - m_padding[NumDims-1].first); + } + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + if (static_cast(Layout) == static_cast(ColMajor)) { + return packetColMajor(index); + } + return packetRowMajor(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + TensorOpCost cost = m_impl.costPerCoeff(vectorized); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) + updateCostPerDimension(cost, i, i == 0); + } else { + for (int i = NumDims - 1; i >= 0; --i) + updateCostPerDimension(cost, i, i == NumDims - 1); + } + return cost; + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( + Index index, int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return (!internal::index_pair_first_statically_eq(dim_index, 0) && + index < m_padding[dim_index].first) || + (!internal::index_pair_second_statically_eq(dim_index, 0) && + index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#else + return (index < m_padding[dim_index].first) || + (index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_first_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_second_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + + void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { + const double in = static_cast(m_impl.dimensions()[i]); + const double out = in + m_padding[i].first + m_padding[i].second; + if (out == 0) + return; + const double reduction = in / out; + cost *= reduction; + if (first) { + cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost() + + reduction * (1 * TensorOpCost::AddCost())); + } else { + cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + reduction * (2 * TensorOpCost::MulCost() + + 1 * TensorOpCost::DivCost())); + } + } + + protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index first = index; + const Index last = index + PacketSize - 1; + const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; + const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; + const Index lastPaddedRight = m_outputStrides[i+1]; + + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index last = index + PacketSize - 1; + const Index first = index; + const Index lastPaddedLeft = m_padding[0].first; + const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); + const Index lastPaddedRight = m_outputStrides[1]; + + if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[0].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + + for (int i = 0; i < NumDims - 1; ++i) { + const Index first = index; + const Index last = index + PacketSize - 1; + const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1]; + const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; + const Index lastPaddedRight = m_outputStrides[i]; + + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i+1]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i+1]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index last = index + PacketSize - 1; + const Index first = index; + const Index lastPaddedLeft = m_padding[NumDims-1].first; + const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); + const Index lastPaddedRight = m_outputStrides[NumDims-1]; + + if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[NumDims-1].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + PaddingDimensions m_padding; + + Scalar m_paddingValue; +}; + + + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h new file mode 100644 index 000000000..886a254f6 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -0,0 +1,269 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H + +namespace Eigen { + +/** \class TensorPatch + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor patch class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorPatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorPatchOp type; +}; + +} // end namespace internal + + + +template +class TensorPatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims) + : m_xpr(expr), m_patch_dims(patch_dims) {} + + EIGEN_DEVICE_FUNC + const PatchDim& patch_dims() const { return m_patch_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const PatchDim m_patch_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorPatchOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value + 1; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + Index num_patches = 1; + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const PatchDim& patch_dims = op.patch_dims(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims-1; ++i) { + m_dimensions[i] = patch_dims[i]; + num_patches *= (input_dims[i] - patch_dims[i] + 1); + } + m_dimensions[NumDims-1] = num_patches; + + m_inputStrides[0] = 1; + m_patchStrides[0] = 1; + for (int i = 1; i < NumDims-1; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1); + } + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + for (int i = 0; i < NumDims-1; ++i) { + m_dimensions[i+1] = patch_dims[i]; + num_patches *= (input_dims[i] - patch_dims[i] + 1); + } + m_dimensions[0] = num_patches; + + m_inputStrides[NumDims-2] = 1; + m_patchStrides[NumDims-2] = 1; + for (int i = NumDims-3; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1); + } + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; + // Find the location of the first element of the patch. + Index patchIndex = index / m_outputStrides[output_stride_index]; + // Find the offset of the element wrt the location of the first element. + Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 2; i > 0; --i) { + const Index patchIdx = patchIndex / m_patchStrides[i]; + patchIndex -= patchIdx * m_patchStrides[i]; + const Index offsetIdx = patchOffset / m_outputStrides[i]; + patchOffset -= offsetIdx * m_outputStrides[i]; + inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; + } + } else { + for (int i = 0; i < NumDims - 2; ++i) { + const Index patchIdx = patchIndex / m_patchStrides[i]; + patchIndex -= patchIdx * m_patchStrides[i]; + const Index offsetIdx = patchOffset / m_outputStrides[i+1]; + patchOffset -= offsetIdx * m_outputStrides[i+1]; + inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; + } + } + inputIndex += (patchIndex + patchOffset); + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; + Index indices[2] = {index, index + PacketSize - 1}; + Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], + indices[1] / m_outputStrides[output_stride_index]}; + Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], + indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]}; + + Index inputIndices[2] = {0, 0}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 2; i > 0; --i) { + const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], + patchIndices[1] / m_patchStrides[i]}; + patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; + patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; + + const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i], + patchOffsets[1] / m_outputStrides[i]}; + patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i]; + patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i]; + + inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; + inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; + } + } else { + for (int i = 0; i < NumDims - 2; ++i) { + const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], + patchIndices[1] / m_patchStrides[i]}; + patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; + patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; + + const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1], + patchOffsets[1] / m_outputStrides[i+1]}; + patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1]; + patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1]; + + inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; + inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; + } + } + inputIndices[0] += (patchIndices[0] + patchOffsets[0]); + inputIndices[1] += (patchIndices[1] + patchOffsets[1]); + + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[PacketSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < PacketSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (TensorOpCost::DivCost() + + TensorOpCost::MulCost() + + 2 * TensorOpCost::AddCost()); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + array m_patchStrides; + + TensorEvaluator m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h new file mode 100644 index 000000000..1655a813e --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -0,0 +1,276 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H +#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H + +namespace Eigen { +namespace internal { + +namespace { + +EIGEN_DEVICE_FUNC uint64_t get_random_seed() { +#ifdef __CUDA_ARCH__ + // We don't support 3d kernels since we currently only use 1 and + // 2d kernels. + assert(threadIdx.z == 0); + return clock64() + + blockIdx.x * blockDim.x + threadIdx.x + + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); + +#elif defined _WIN32 + // Use the current time as a baseline. + SYSTEMTIME st; + GetSystemTime(&st); + int time = st.wSecond + 1000 * st.wMilliseconds; + // Mix in a random number to make sure that we get different seeds if + // we try to generate seeds faster than the clock resolution. + // We need 2 random values since the generator only generate 16 bits at + // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) + int rnd1 = ::rand(); + int rnd2 = ::rand(); + uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; + return rnd; + +#elif defined __APPLE__ + // Same approach as for win32, except that the random number generator + // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). + uint64_t rnd = ::random() ^ mach_absolute_time(); + return rnd; + +#else + // Augment the current time with pseudo random number generation + // to ensure that we get different seeds if we try to generate seeds + // faster than the clock resolution. + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + uint64_t rnd = ::random() ^ ts.tv_nsec; + return rnd; +#endif +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { + // TODO: Unify with the implementation in the non blocking thread pool. + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { + seed = seed ? seed : get_random_seed(); + return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; +} + +} // namespace + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeUniform(uint64_t* state) { + unsigned rnd = PCG_XSH_RS_generator(state); + return static_cast(rnd); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::half RandomToTypeUniform(uint64_t* state) { + Eigen::half result; + // Generate 10 random bits for the mantissa + unsigned rnd = PCG_XSH_RS_generator(state); + result.x = static_cast(rnd & 0x3ffu); + // Set the exponent + result.x |= (static_cast(15) << 10); + // Return the final result + return result - Eigen::half(1.0f); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float RandomToTypeUniform(uint64_t* state) { + typedef union { + uint32_t raw; + float fp; + } internal; + internal result; + // Generate 23 random bits for the mantissa mantissa + const unsigned rnd = PCG_XSH_RS_generator(state); + result.raw = rnd & 0x7fffffu; + // Set the exponent + result.raw |= (static_cast(127) << 23); + // Return the final result + return result.fp - 1.0f; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double RandomToTypeUniform(uint64_t* state) { + typedef union { + uint64_t raw; + double dp; + } internal; + internal result; + result.raw = 0; + // Generate 52 random bits for the mantissa + // First generate the upper 20 bits + unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; + // The generate the lower 32 bits + unsigned rnd2 = PCG_XSH_RS_generator(state); + result.raw = (static_cast(rnd1) << 32) | rnd2; + // Set the exponent + result.raw |= (static_cast(1023) << 52); + // Return the final result + return result.dp - 1.0; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} + +template class UniformRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + const UniformRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeUniform(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeUniform(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + +template +struct functor_traits > { + enum { + // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). + Cost = 12 * NumTraits::AddCost * + ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), + PacketAccess = UniformRandomGenerator::PacketAccess + }; +}; + + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeNormal(uint64_t* state) { + // Use the ratio of uniform method to generate numbers following a normal + // distribution. See for example Numerical Recipes chapter 7.3.9 for the + // details. + T u, v, q; + do { + u = RandomToTypeUniform(state); + v = T(1.7156) * (RandomToTypeUniform(state) - T(0.5)); + const T x = u - T(0.449871); + const T y = numext::abs(v) + T(0.386595); + q = x*x + y * (T(0.196)*y - T(0.25472)*x); + } while (q > T(0.27597) && + (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); + + return v/u; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} + + +template class NormalRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( + const NormalRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeNormal(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeNormal(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + + +template +struct functor_traits > { + enum { + // On average, we need to generate about 3 random numbers + // 15 mul, 8 add, 1.5 logs + Cost = 3 * functor_traits >::Cost + + 15 * NumTraits::AddCost + 8 * NumTraits::AddCost + + 3 * functor_traits >::Cost / 2, + PacketAccess = NormalRandomGenerator::PacketAccess + }; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h new file mode 100644 index 000000000..41d0d0022 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -0,0 +1,781 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H + +namespace Eigen { + +/** \class TensorReduction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reduction class. + * + */ + +namespace internal { + template class MakePointer_ > + struct traits > + : traits +{ + typedef traits XprTraits; + typedef typename XprTraits::Scalar Scalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; + + template struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorReductionOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorReductionOp type; +}; + + +template struct DimInitializer { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, + const array::value>& reduced, + OutputDims* output_dims, ReducedDims* reduced_dims) { + const int NumInputDims = internal::array_size::value; + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + (*reduced_dims)[reduceIndex] = input_dims[i]; + ++reduceIndex; + } else { + (*output_dims)[outputIndex] = input_dims[i]; + ++outputIndex; + } + } + } +}; + +template <> struct DimInitializer > { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, const array&, + Sizes<>*, array* reduced_dims) { + const int NumInputDims = internal::array_size::value; + for (int i = 0; i < NumInputDims; ++i) { + (*reduced_dims)[i] = input_dims[i]; + } + } +}; + + +template +struct are_inner_most_dims { + static const bool value = false; +}; +template +struct preserve_inner_most_dims { + static const bool value = false; +}; + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES +template +struct are_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, 0); + static const bool tmp3 = index_statically_eq(array_size::value-1, array_size::value-1); + static const bool value = tmp1 & tmp2 & tmp3; +}; +template +struct are_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, NumTensorDims - array_size::value); + static const bool tmp3 = index_statically_eq(array_size::value - 1, NumTensorDims - 1); + static const bool value = tmp1 & tmp2 & tmp3; + +}; +template +struct preserve_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_gt(0, 0); + static const bool value = tmp1 & tmp2; + +}; +template +struct preserve_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_lt(array_size::value - 1, NumTensorDims - 1); + static const bool value = tmp1 & tmp2; +}; +#endif + + +template +struct GenericDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + GenericDimReducer::reduce(self, input, reducer, accum); + } + } +}; +template +struct GenericDimReducer<0, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + for (int j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reduce(self.m_impl.coeff(input), accum); + } + } +}; +template +struct GenericDimReducer<-1, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { + reducer.reduce(self.m_impl.coeff(index), accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalize(accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + const int packetSize = internal::unpacket_traits::size; + const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; + typename Self::PacketReturnType p = reducer.template initializePacket(); + for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { + reducer.reducePacket(self.m_impl.template packet(firstIndex + j), &p); + } + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalizeBoth(accum, p); + } +}; + +template +struct InnerMostDimPreserver { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; + +template +struct InnerMostDimPreserver { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + InnerMostDimPreserver::reduce(self, input, reducer, accum); + } + } +}; + +template +struct InnerMostDimPreserver<0, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { + for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reducePacket(self.m_impl.template packet(input), accum); + } + } +}; +template +struct InnerMostDimPreserver<-1, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; + +// Default full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = false; + + static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { + const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); + *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + } +}; + + +#ifdef EIGEN_USE_THREADS +// Multithreaded full reducers +template +struct FullReducerShard { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex, + typename Self::Index numValuesToReduce, Op& reducer, + typename Self::CoeffReturnType* output) { + *output = InnerMostDimReducer::reduce( + self, firstIndex, numValuesToReduce, reducer); + } +}; + +// Multithreaded full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = !Op::IsStateful; + static const int PacketSize = + unpacket_traits::size; + + // launch one reducer per thread and accumulate the result. + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, + typename Self::CoeffReturnType* output) { + typedef typename Self::Index Index; + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + if (num_coeffs == 0) { + *output = reducer.finalize(reducer.initialize()); + return; + } + const TensorOpCost cost = + self.m_impl.costPerCoeff(Vectorizable) + + TensorOpCost(0, 0, internal::functor_traits::Cost, Vectorizable, + PacketSize); + const int num_threads = TensorCostModel::numThreads( + num_coeffs, cost, device.numThreads()); + if (num_threads == 1) { + *output = + InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + return; + } + const Index blocksize = + std::floor(static_cast(num_coeffs) / num_threads); + const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; + eigen_assert(num_coeffs >= numblocks * blocksize); + + Barrier barrier(internal::convert_index(numblocks)); + MaxSizeVector shards(numblocks, reducer.initialize()); + for (Index i = 0; i < numblocks; ++i) { + device.enqueue_with_barrier(&barrier, &FullReducerShard::run, + self, i * blocksize, blocksize, reducer, + &shards[i]); + } + typename Self::CoeffReturnType finalShard; + if (numblocks * blocksize < num_coeffs) { + finalShard = InnerMostDimReducer::reduce( + self, numblocks * blocksize, num_coeffs - numblocks * blocksize, + reducer); + } else { + finalShard = reducer.initialize(); + } + barrier.Wait(); + + for (Index i = 0; i < numblocks; ++i) { + reducer.reduce(shards[i], &finalShard); + } + *output = reducer.finalize(finalShard); + } +}; + +#endif + + +// Default inner reducer +template +struct InnerReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; + +// Default outer reducer +template +struct OuterReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); + + +#ifdef EIGEN_HAS_CUDA_FP16 +template +__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); +template +__global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); +template +__global__ void InnerReductionKernelHalfFloat(R, const S, I, I, half*); + +#endif + +template +__global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + +template +__global__ void OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); +#endif + +} // end namespace internal + + +template class MakePointer_> +class TensorReductionOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) + { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const XprType& expression() const { return m_expr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dims& dims() const { return m_dims; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Op& reducer() const { return m_reducer; } + + protected: + typename XprType::Nested m_expr; + const Dims m_dims; + const Op m_reducer; +}; + + +// Eval as rvalue +template class MakePointer_, typename Device> +struct TensorEvaluator, Device> +{ + typedef TensorReductionOp XprType; + typedef typename XprType::Index Index; + typedef ArgType ChildType; + typedef typename TensorEvaluator::Dimensions InputDimensions; + static const int NumInputDims = internal::array_size::value; + static const int NumReducedDims = internal::array_size::value; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename internal::conditional, DSizes >::type Dimensions; + typedef typename XprType::Scalar Scalar; + typedef TensorEvaluator, Device> Self; + static const bool InputPacketAccess = TensorEvaluator::PacketAccess; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = Self::InputPacketAccess && Op::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; + static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; + static const bool RunningFullReduction = (NumOutputDims==0); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device), m_xpr_dims(op.dims()) + { + EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Build the bitmap indicating if an input dimension is reduced or not. + for (int i = 0; i < NumInputDims; ++i) { + m_reduced[i] = false; + } + for (int i = 0; i < NumReducedDims; ++i) { + eigen_assert(op.dims()[i] >= 0); + eigen_assert(op.dims()[i] < NumInputDims); + m_reduced[op.dims()[i]] = true; + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + internal::DimInitializer::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims); + + // Precompute output strides. + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_outputStrides.back() = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } + } + } + + // Precompute input strides. + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides.back() = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (m_reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } + } + } + + // Special case for full reductions + if (NumOutputDims == 0) { + m_preservedStrides[0] = internal::array_prod(input_dims); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(typename MakePointer_::Type data) { + m_impl.evalSubExprsIfNeeded(NULL); + + // Use the FullReducer if possible. + if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction && + internal::FullReducer::HasOptimizedImplementation && + ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || + !RunningOnGPU))) { + bool need_assign = false; + if (!data) { + m_result = static_cast(m_device.allocate(sizeof(CoeffReturnType))); + data = m_result; + need_assign = true; + } + Op reducer(m_reducer); + internal::FullReducer::run(*this, reducer, m_device, data); + return need_assign; + } + else if(RunningOnSycl){ + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + Op reducer(m_reducer); + internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + return (m_result != NULL); + } + + // Attempt to use an optimized reduction. + else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { + bool reducing_inner_dims = true; + for (int i = 0; i < NumReducedDims; ++i) { + if (static_cast(Layout) == static_cast(ColMajor)) { + reducing_inner_dims &= m_reduced[i]; + } else { + reducing_inner_dims &= m_reduced[NumInputDims - 1 - i]; + } + } + if (internal::InnerReducer::HasOptimizedImplementation && + (reducing_inner_dims || ReducingInnerMostDims)) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } + } + Op reducer(m_reducer); + if (internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } + } + + bool preserving_inner_dims = true; + for (int i = 0; i < NumReducedDims; ++i) { + if (static_cast(Layout) == static_cast(ColMajor)) { + preserving_inner_dims &= m_reduced[NumInputDims - 1 - i]; + } else { + preserving_inner_dims &= m_reduced[i]; + } + } + if (internal::OuterReducer::HasOptimizedImplementation && + preserving_inner_dims) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } + } + Op reducer(m_reducer); + if (internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } + } + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) { + return *(m_result + index); + } + Op reducer(m_reducer); + if (ReducingInnerMostDims || RunningFullReduction) { + const Index num_values_to_reduce = + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + return internal::InnerMostDimReducer::reduce(*this, firstInput(index), + num_values_to_reduce, reducer); + } else { + typename Self::CoeffReturnType accum = reducer.initialize(); + internal::GenericDimReducer::reduce(*this, firstInput(index), reducer, &accum); + return reducer.finalize(accum); + } + } + + // TODO(bsteiner): provide a more efficient implementation. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); + + if (RunningOnGPU && m_result) { + return internal::pload(m_result + index); + } + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + if (ReducingInnerMostDims) { + const Index num_values_to_reduce = + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + const Index firstIndex = firstInput(index); + for (Index i = 0; i < PacketSize; ++i) { + Op reducer(m_reducer); + values[i] = internal::InnerMostDimReducer::reduce(*this, firstIndex + i * num_values_to_reduce, + num_values_to_reduce, reducer); + } + } else if (PreservingInnerMostDims) { + const Index firstIndex = firstInput(index); + const int innermost_dim = (static_cast(Layout) == static_cast(ColMajor)) ? 0 : NumOutputDims - 1; + // TBD: extend this the the n innermost dimensions that we preserve. + if (((firstIndex % m_dimensions[innermost_dim]) + PacketSize - 1) < m_dimensions[innermost_dim]) { + Op reducer(m_reducer); + typename Self::PacketReturnType accum = reducer.template initializePacket(); + internal::InnerMostDimPreserver::reduce(*this, firstIndex, reducer, &accum); + return reducer.finalizePacket(accum); + } else { + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + } + } else { + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + // Must be called after evalSubExprsIfNeeded(). + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + if (RunningFullReduction && m_result) { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } else { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const double compute_cost = num_values_to_reduce * internal::functor_traits::Cost; + return m_impl.costPerCoeff(vectorized) * num_values_to_reduce + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + } + + EIGEN_DEVICE_FUNC typename MakePointer_::Type data() const { return m_result; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + /// added for sycl in order to re-construct the reduction eval on the device for the sub-kernel + const Dims& xprDims() const {return m_xpr_dims;} + + + private: + template friend struct internal::GenericDimReducer; + template friend struct internal::InnerMostDimReducer; + template friend struct internal::InnerMostDimPreserver; + template friend struct internal::FullReducer; +#ifdef EIGEN_USE_THREADS + template friend struct internal::FullReducerShard; +#endif +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); +#ifdef EIGEN_HAS_CUDA_FP16 + template friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); + template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); + template friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); +#endif + template friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + + template friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); +#endif + + template friend struct internal::InnerReducer; + + // Returns the Index in the input tensor of the first value that needs to be + // used to compute the reduction at output index "index". + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + if (ReducingInnerMostDims) { + if (static_cast(Layout) == static_cast(ColMajor)) { + return index * m_preservedStrides[0]; + } else { + return index * m_preservedStrides[NumPreservedStrides - 1]; + } + } + // TBD: optimize the case where we preserve the innermost dimensions. + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumOutputDims - 1; i > 0; --i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[0] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[0]; + } + } else { + for (int i = 0; i < NumOutputDims - 1; ++i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[NumPreservedStrides - 1]; + } + } + return startInput; + } + + // Bitmap indicating if an input dimension is reduced or not. + array m_reduced; + // Dimensions of the output of the operation. + Dimensions m_dimensions; + // Precomputed strides for the output tensor. + array m_outputStrides; + // Subset of strides of the input tensor for the non-reduced dimensions. + // Indexed by output dimensions. + static const int NumPreservedStrides = max_n_1::size; + array m_preservedStrides; + + // Subset of strides of the input tensor for the reduced dimensions. + // Indexed by reduced dimensions. + array m_reducedStrides; + // Size of the input dimensions that are reduced. + // Indexed by reduced dimensions. + array m_reducedDims; + + // Evaluator for the input expression. + TensorEvaluator m_impl; + + // Operation to apply for computing the reduction. + Op m_reducer; + + // For full reductions +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + static const bool RunningOnGPU = internal::is_same::value; + static const bool RunningOnSycl = false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; +#else + static const bool RunningOnGPU = false; + static const bool RunningOnSycl = false; +#endif + typename MakePointer_::Type m_result; + + const Device& m_device; + const Dims& m_xpr_dims; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H diff --git a/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h new file mode 100644 index 000000000..65638b6a8 --- /dev/null +++ b/thirdparty/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -0,0 +1,750 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H + +namespace Eigen { +namespace internal { + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple cuda thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another cuda thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if __CUDA_ARCH__ >= 300 + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + assert(0 && "Wordsize not supported"); + } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +// We extend atomicExch to support extra data types +template +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast(address); + return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); +} + +#ifdef EIGEN_HAS_CUDA_FP16 +template